I’m working on a multi task classification with DistilBert. I started training the model and it finished the first epoch, then it starts evaluation and throws the error below at the end of the evaluation.
I really need help figuring out what is going on here I out of options I can’t understand what is going on. If anyone could shed a light I would appreciate it.
Code:
# Defining the metrics
LINE_METRIC = evaluate.load("f1")
CAT_METRIC = evaluate.load("f1")
SUB_CAT_METRIC = evaluate.load("f1")
MOTIVE_METRIC = evaluate.load("f1")
def compute_metrics(eval_pred):
print(eval_pred)
all_logits, all_labels = eval_pred
logits_line, logits_cat, logits_sub_cat, logits_motive = all_logits
line_labels, cat_labels, sub_cat_labels, motive_labels = all_labels
line_predictions = np.argmax(logits_line, axis=-1)
cat_predictions = np.argmax(logits_cat, axis=-1)
sub_cat_predictions = np.argmax(logits_sub_cat, axis=-1)
motive_predictions = np.argmax(logits_motive, axis=-1)
print("PRED")
print(line_predictions, cat_predictions, sub_cat_predictions, motive_predictions)
line_computed_metrics = LINE_METRIC.compute(predictions=line_predictions, references=line_labels, average='weighted')
cat_computed_metrics = CAT_METRIC.compute(predictions=cat_predictions, references=cat_labels, average='weighted')
sub_cat_computed_metrics = SUB_CAT_METRIC.compute(predictions=sub_cat_predictions, references=sub_cat_labels, average='weighted')
motive_computed_metrics = MOTIVE_METRIC.compute(predictions=motive_predictions, references=motive_labels, average='weighted')
print("SCORE")
print(line_computed_metrics, cat_computed_metrics, sub_cat_computed_metrics, motive_computed_metrics)
return {
'f1_line': line_computed_metrics['f1'],
'f1_cat': cat_computed_metrics['f1'],
'f1_sub_cat': sub_cat_computed_metrics['f1'],
'f1_motive': motive_computed_metrics['f1'],
}
output_directory = RESULTS_DIRECTORY
evaluation_strategy = 'epoch'
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradint_accumulation_steps = 2
learning_rate = 2e-5
weight_decay = 0.01
max_grad_norm = 1
num_train_epochs = NUM_TRAIN_EPOCHS
lr_scheduler_type = 'linear'
warmup_ratio = 0.05
logging_dir = LOGGING_DIRECTORY
logging_strategy = 'epoch'
save_strategy = 'epoch'
save_total_limit = 1
label_names = ['line_labels', 'cat_labels', 'sub_cal_label','motive_labels']
load_best_model_at_end = True
metric_for_best_model = 'eval_f1_cat'
greater_is_better = True
label_smoothing_factor = 0
#report_to = 'tensorboard'
gradient_checkpointing = False
# Setup training arguments
training_args = TrainingArguments(
output_dir=output_directory,
evaluation_strategy=evaluation_strategy,
learning_rate=learning_rate,
per_device_train_batch_size=per_device_train_batch_size,
per_device_eval_batch_size=per_device_eval_batch_size,
num_train_epochs=num_train_epochs,
weight_decay=weight_decay,
logging_dir=logging_dir,
label_names=label_names,
max_grad_norm=max_grad_norm,
lr_scheduler_type=lr_scheduler_type,
warmup_ratio=warmup_ratio,
logging_strategy=logging_strategy,
save_strategy=save_strategy,
save_total_limit=save_total_limit,
load_best_model_at_end=load_best_model_at_end,
#metric_for_best_model=metric_for_best_model,
#greater_is_better=greater_is_better,
label_smoothing_factor=label_smoothing_factor,
#report_to=report_to,
gradient_checkpointing=gradient_checkpointing
)
#early_stop_callback = EarlyStoppingCallback(3)
# Initialize the Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=data['train'],
eval_dataset=data['test'],
#tokenizer=tokenizer,
compute_metrics=compute_metrics,
data_collator=data_collator,
#callbacks=[early_stop_callback]
)
Error:
KeyError Traceback (most recent call last)
Cell In[36], line 1
----> 1 trainer.train()
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1859, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1857 hf_hub_utils.enable_progress_bars()
1858 else:
-> 1859 return inner_training_loop(
1860 args=args,
1861 resume_from_checkpoint=resume_from_checkpoint,
1862 trial=trial,
1863 ignore_keys_for_eval=ignore_keys_for_eval,
1864 )
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2298, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
2295 self.control.should_training_stop = True
2297 self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
-> 2298 self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
2300 if DebugOption.TPU_METRICS_DEBUG in self.args.debug:
2301 if is_torch_xla_available():
2302 # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2673, in Trainer._maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
2670 self.lr_scheduler.step(metrics[metric_to_check])
2672 if self.control.should_save:
-> 2673 self._save_checkpoint(model, trial, metrics=metrics)
2674 self.control = self.callback_handler.on_save(self.args, self.state, self.control)
File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2765, in Trainer._save_checkpoint(self, model, trial, metrics)
2763 if not metric_to_check.startswith("eval_"):
2764 metric_to_check = f"eval_{metric_to_check}"
-> 2765 metric_value = metrics[metric_to_check]
2767 operator = np.greater if self.args.greater_is_better else np.less
2768 if (
2769 self.state.best_metric is None
2770 or self.state.best_model_checkpoint is None
2771 or operator(metric_value, self.state.best_metric)
2772 ):
KeyError: 'eval_loss'