Eval_loss error on evaluation at the first epoch

I’m working on a multi task classification with DistilBert. I started training the model and it finished the first epoch, then it starts evaluation and throws the error below at the end of the evaluation.

I really need help figuring out what is going on here I out of options I can’t understand what is going on. If anyone could shed a light I would appreciate it.

Code:

# Defining the metrics 

LINE_METRIC = evaluate.load("f1")
CAT_METRIC = evaluate.load("f1")
SUB_CAT_METRIC = evaluate.load("f1")
MOTIVE_METRIC = evaluate.load("f1")

def compute_metrics(eval_pred):
    print(eval_pred)
    all_logits, all_labels = eval_pred
    logits_line, logits_cat, logits_sub_cat, logits_motive = all_logits 
    line_labels, cat_labels, sub_cat_labels, motive_labels = all_labels

    line_predictions = np.argmax(logits_line, axis=-1)
    cat_predictions = np.argmax(logits_cat, axis=-1)
    sub_cat_predictions = np.argmax(logits_sub_cat, axis=-1)
    motive_predictions = np.argmax(logits_motive, axis=-1)
    
    print("PRED")
    print(line_predictions, cat_predictions, sub_cat_predictions, motive_predictions)
    
    line_computed_metrics = LINE_METRIC.compute(predictions=line_predictions, references=line_labels, average='weighted')
    cat_computed_metrics = CAT_METRIC.compute(predictions=cat_predictions, references=cat_labels, average='weighted')
    sub_cat_computed_metrics = SUB_CAT_METRIC.compute(predictions=sub_cat_predictions, references=sub_cat_labels, average='weighted')
    motive_computed_metrics = MOTIVE_METRIC.compute(predictions=motive_predictions, references=motive_labels, average='weighted')

    print("SCORE")
    print(line_computed_metrics, cat_computed_metrics, sub_cat_computed_metrics, motive_computed_metrics)

    return {
        'f1_line': line_computed_metrics['f1'],
        'f1_cat': cat_computed_metrics['f1'],
        'f1_sub_cat': sub_cat_computed_metrics['f1'],
        'f1_motive': motive_computed_metrics['f1'],
    }
output_directory = RESULTS_DIRECTORY
evaluation_strategy = 'epoch'
per_device_train_batch_size = 4
per_device_eval_batch_size = 4
gradint_accumulation_steps = 2
learning_rate = 2e-5
weight_decay = 0.01
max_grad_norm = 1
num_train_epochs = NUM_TRAIN_EPOCHS
lr_scheduler_type = 'linear'
warmup_ratio = 0.05
logging_dir = LOGGING_DIRECTORY
logging_strategy = 'epoch'
save_strategy = 'epoch'
save_total_limit = 1
label_names = ['line_labels', 'cat_labels', 'sub_cal_label','motive_labels']
load_best_model_at_end = True
metric_for_best_model = 'eval_f1_cat'
greater_is_better = True
label_smoothing_factor = 0
#report_to = 'tensorboard'
gradient_checkpointing = False
# Setup training arguments
training_args = TrainingArguments(
    output_dir=output_directory,
    evaluation_strategy=evaluation_strategy,
    learning_rate=learning_rate,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    num_train_epochs=num_train_epochs,
    weight_decay=weight_decay,
    logging_dir=logging_dir,
    label_names=label_names,
    max_grad_norm=max_grad_norm,
    lr_scheduler_type=lr_scheduler_type,
    warmup_ratio=warmup_ratio,
    logging_strategy=logging_strategy,
    save_strategy=save_strategy,
    save_total_limit=save_total_limit,
    load_best_model_at_end=load_best_model_at_end,
    #metric_for_best_model=metric_for_best_model,
    #greater_is_better=greater_is_better,
    label_smoothing_factor=label_smoothing_factor,
    #report_to=report_to,
    gradient_checkpointing=gradient_checkpointing
)

#early_stop_callback = EarlyStoppingCallback(3)
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=data['train'],
    eval_dataset=data['test'],
    #tokenizer=tokenizer,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    #callbacks=[early_stop_callback]
)

Error:

KeyError                                  Traceback (most recent call last)
Cell In[36], line 1
----> 1 trainer.train()

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:1859, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1857         hf_hub_utils.enable_progress_bars()
   1858 else:
-> 1859     return inner_training_loop(
   1860         args=args,
   1861         resume_from_checkpoint=resume_from_checkpoint,
   1862         trial=trial,
   1863         ignore_keys_for_eval=ignore_keys_for_eval,
   1864     )

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2298, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2295     self.control.should_training_stop = True
   2297 self.control = self.callback_handler.on_epoch_end(args, self.state, self.control)
-> 2298 self._maybe_log_save_evaluate(tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
   2300 if DebugOption.TPU_METRICS_DEBUG in self.args.debug:
   2301     if is_torch_xla_available():
   2302         # tpu-comment: Logging debug metrics for PyTorch/XLA (compile, execute times, ops, etc.)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2673, in Trainer._maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval)
   2670         self.lr_scheduler.step(metrics[metric_to_check])
   2672 if self.control.should_save:
-> 2673     self._save_checkpoint(model, trial, metrics=metrics)
   2674     self.control = self.callback_handler.on_save(self.args, self.state, self.control)

File /opt/conda/lib/python3.10/site-packages/transformers/trainer.py:2765, in Trainer._save_checkpoint(self, model, trial, metrics)
   2763 if not metric_to_check.startswith("eval_"):
   2764     metric_to_check = f"eval_{metric_to_check}"
-> 2765 metric_value = metrics[metric_to_check]
   2767 operator = np.greater if self.args.greater_is_better else np.less
   2768 if (
   2769     self.state.best_metric is None
   2770     or self.state.best_model_checkpoint is None
   2771     or operator(metric_value, self.state.best_metric)
   2772 ):

KeyError: 'eval_loss'