When I try to use get_peft_model with a Trainer
object, it throwing KeyError: 'eval_qkw'
. and when i don’t use get_peft_model , everything is ok.
here is my code, and transformers version is 4.38.1
def compute_metrics(p: EvalPrediction):
y_pred = p.predictions.clip(1, 6).round()
qwk = cohen_kappa_score(p.label_ids, y_pred, weights="quadratic")
metrics = {'qwk': qwk}
return metrics
class MyModel(nn.Module):
def __init__(self, A):
super(MyModel, self).__init__()
self.bert = AutoModel.from_pretrained(modelpath)
dim = self.bert.config.hidden_size
self.fc = nn.Linear(dim, 1)
self.A = A
def forward(self, input_ids, attention_mask=None, labels=None):
hidden = self.bert(input_ids, attention_mask=attention_mask)[0][:,0]
y = self.fc(hidden).squeeze(1) + self.A
if labels is not None:
loss = F.mse_loss(y, labels)
return loss, y
return y#modeling_outputs.SequenceClassifierOutput(logits=y)
peft_config = LoraConfig(target_modules=["query_proj", "value_proj"], modules_to_save=["fc"], inference_mode=False, r=32, lora_alpha=64, lora_dropout=0.1)
model = MyModel(A=np.mean(tokenized_datasets['train']['labels']))
model = get_peft_model(model, peft_config) # add this throwing KeyError
training_args = TrainingArguments(
output_dir=checkpoint,
evaluation_strategy = "epoch",
save_strategy = "epoch",
num_train_epochs=5,
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
gradient_accumulation_steps=1,
metric_for_best_model=metric_name,
greater_is_better=True,
fp16=True,
save_total_limit=1,
load_best_model_at_end=True,
seed = 41,
# deepspeed='./deepspeed_config.json'
)
trainer = Trainer(
model,
training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["test"],
tokenizer=tokenizer,
compute_metrics=compute_metrics,
# callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)
trainer.train()