I am trying to finetune a model: TheBloke/Mistral-7B-Instruct-v0.1-GPTQ. However, I am getting RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn error.
How can I fix this error?
Here is the code that is throwing the error:
import transformers
from datetime import datetime
project = "Mixtral-alpaca-finance-finetune"
base_model_name = "mixtral"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name
tokenizer.pad_token = tokenizer.eos_token
trainer = transformers.Trainer(
model=model,
train_dataset=tokenized_train_dataset,
eval_dataset=tokenized_val_dataset,
#dataset_text_field="text",
#max_seq_length=512,
args=transformers.TrainingArguments(
output_dir=output_dir,
warmup_steps=5,
per_device_train_batch_size=1,
gradient_checkpointing=True,
gradient_accumulation_steps=4,
max_steps=1000,
learning_rate=2.5e-5,
lr_scheduler_type="cosine",
logging_steps=25,
fp16=True,
optim="paged_adamw_8bit",
logging_dir="./logs", # Directory for storing logs
save_strategy="steps", # Save the model checkpoint every logging step
save_steps=50, # Save checkpoints every 50 steps
evaluation_strategy="steps", # Evaluate the model every logging step
eval_steps=50, # Evaluate and save checkpoints every 50 steps
do_eval=True, # Perform evaluation at the end of training
# report_to="wandb", # Comment this out if you don't want to use weights & baises
run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}" # Name of the W&B run (optional)
),
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False # silence the warnings. Re-enable for inference!
trainer.train()
I tried to upgrade the torch, accelerate auto-gptq but it still did not work.