I am using SFTTrainer with Lora to fine tune gemma model. I am getting validation loss but training loss is reported as “No log”
Here is my configuration:
args = TrainingArguments(
per_device_train_batch_size=6,
per_device_eval_batch_size=6,
gradient_accumulation_steps=4,
save_steps=200,
save_total_limit=30,
save_strategy="steps",
evaluation_strategy='steps',
eval_steps=200,
logging_steps=200,
warmup_steps=2,
num_train_epochs =2,
# max_steps=2,
learning_rate=2e-4,
lr_scheduler_type = "cosine",
weight_decay = 0.001,
max_grad_norm=1.0,
fp16 = False,
bf16 = True,
logging_strategy = "epoch",
output_dir=output_dir,
optim="paged_adamw_8bit",
seed=42,
gradient_checkpointing = True,
gradient_checkpointing_kwargs={'use_reentrant':False},
#accelerator_config = {'split_batches' : True},
report_to = None
)
trainer = SFTTrainer(
model=model,
train_dataset=dataset['train'],
eval_dataset=dataset["test"] ,
args=args ,
peft_config=lora_config,
dataset_text_field=dataset_text_field,
max_seq_length = 512,
tokenizer = tokenizer,
callbacks=[SavePeftModelCallback],# This is crashing the kernel
# data_collator=collator,
)