from trl import SFTConfig
import wandb
wandb.init(mode="disabled")
# Configure training arguments
training_args = SFTConfig(
output_dir="qwen_sign_language_interpretation", # Directory to save the model
max_steps=250,
per_device_train_batch_size=1, # Batch size for training
per_device_eval_batch_size=1, # Batch size for evaluation
gradient_accumulation_steps=16, # Steps to accumulate gradients
gradient_checkpointing=True, # Enable gradient checkpointing for memory efficiency
# Optimizer and scheduler settings
optim="adamw_torch_fused", # Optimizer type
learning_rate=2e-4, # Learning rate for training
lr_scheduler_type="constant", # Type of learning rate scheduler
# Logging and evaluation
logging_steps=10, # Steps interval for logging
# eval_steps=10, # Steps interval for evaluation
# eval_strategy="steps", # Strategy for evaluation
save_strategy="steps", # Strategy for saving the model
save_steps=20, # Steps interval for saving
metric_for_best_model="train_loss", # Metric to evaluate the best model
greater_is_better=False, # Whether higher metric values are better
# Mixed precision and gradient settings
bf16=True, # Use bfloat16 precision
deepspeed = "/content/zero_stage3_offload_config.json",
max_grad_norm=0.3, # Maximum norm for gradient clipping
warmup_ratio=0.03, # Ratio of total steps for warmup
report_to="none", # Reporting tool for tracking metrics
# Gradient checkpointing settings
# gradient_checkpointing_kwargs={"use_reentrant": False}, # Options for gradient checkpointing
# Dataset configuration
dataset_text_field="", # Text field in dataset
dataset_kwargs={"skip_prepare_dataset": True}, # Additional dataset options
max_seq_length=1024 # Maximum sequence length for input
)
training_args.remove_unused_columns = False # Keep unused columns in dataset
this is my code , im getting the below error.please help me resolve it