I’m working on finetuning LILT model on a custom dataset where the labels aren’t exactly IOB format .When using seqeval i get an error telling me the tags aren’t NER tags so i switched to loading each metric separately however after training the model using trainer() i kept getting the same value for accuracy f1 precision and recall i don’t understand why especially since the code i have been running worked before :
import evaluate
# metric = evaluate.load("seqeval")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")
accuracy_metric = evaluate.load("accuracy")
import numpy as np
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=2)
# Remove ignored index (special tokens)
true_predictions = [
[label_list[p] for (p, l) in zip(prediction, label) if l != -100]
for prediction, label in zip(predictions, labels)
]
true_labels = [
[label_list[l] for (p, l) in zip(prediction, label) if l != -100]
for prediction, label in zip(predictions, labels)
]
# flat list
true_predictions = [item for sublist in true_predictions for item in sublist]
true_labels = [item for sublist in true_labels for item in sublist]
results = {
precision_metric.compute(predictions = true_predictions, references = true_labels, average="micro")
recall_metric.compute(predictions = true_predictions, references = true_labels, average="micro")
f1_metric.compute(predictions = true_predictions, references = true_labels, average="micro")
accuracy_metric.compute(predictions = true_predictions, references = true_labels)}
return results
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(output_dir="test",
hub_model_id=hub_model_id,
num_train_epochs=5,
learning_rate=2e-5,
evaluation_strategy="steps",
eval_steps=5,
load_best_model_at_end=True,
metric_for_best_model="f1")
from transformers.data.data_collator import default_data_collator
class CustomTrainer(Trainer):
def get_train_dataloader(self):
return train_dataloader
def get_eval_dataloader(self, eval_dataset = None):
return eval_dataloader
# Initialize our Trainer
trainer = CustomTrainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)