I’m training Llama2 with QLoRA and the loss fluctuates. Can anyone explain what might be the cause for this?
I have 300 datapoints in the training dataset and 100 in the validation dataset.
config = LoraConfig(
r=8,
lora_alpha=32,
# target_modules=["query_key_value"],
target_modules=["q_proj","v_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
trainer = transformers.Trainer(
model=model,
train_dataset=tokenized_dataset["train"],
eval_dataset=tokenized_dataset["validation"],
args=transformers.TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
warmup_steps=2,
max_steps=10,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
output_dir="outputs",
optim="paged_adamw_8bit"
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Please re-enable for inference!
trainer.train()
Step Training Loss
1 9.978800
2 10.143000
3 10.144600
4 9.815600
5 10.269600
6 9.560200
7 9.600600
8 9.348600
9 9.374900
10 9.486000