Getting the same value for all evaluation metrics

I’m working on finetuning LILT model on a custom dataset where the labels aren’t exactly IOB format .When using seqeval i get an error telling me the tags aren’t NER tags so i switched to loading each metric separately however after training the model using trainer() i kept getting the same value for accuracy f1 precision and recall i don’t understand why especially since the code i have been running worked before :

import evaluate

# metric = evaluate.load("seqeval")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")
accuracy_metric = evaluate.load("accuracy")

import numpy as np

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=2)

    # Remove ignored index (special tokens)
    true_predictions = [
        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]
    true_labels = [
        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
        for prediction, label in zip(predictions, labels)
    ]

    # flat list
    true_predictions = [item for sublist in true_predictions for item in sublist]
    true_labels = [item for sublist in true_labels for item in sublist]

    results = {
    
    precision_metric.compute(predictions = true_predictions, references = true_labels, average="micro")
    recall_metric.compute(predictions = true_predictions, references = true_labels, average="micro")
    f1_metric.compute(predictions = true_predictions, references = true_labels, average="micro")
   accuracy_metric.compute(predictions = true_predictions, references = true_labels)}

    return results


from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test",
                                  hub_model_id=hub_model_id,
                                  num_train_epochs=5,
                                  learning_rate=2e-5,
                                  evaluation_strategy="steps",
                                  eval_steps=5,
                                  load_best_model_at_end=True,
                                  metric_for_best_model="f1")


from transformers.data.data_collator import default_data_collator

class CustomTrainer(Trainer):
    def get_train_dataloader(self):
        return train_dataloader

    def get_eval_dataloader(self, eval_dataset = None):
        return eval_dataloader

# Initialize our Trainer
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)
1 Like

This is a portion of the finetuning results i was getting
Screenshot 2024-07-21 144843