This is a duplicate of a question I’ve posted on Stack Overflow that until now has gotten no answers (link to SO post).
Basically, I’m using the Trainer class to fine tune a model on a text classification task and the metrics I’m tracking are taking on the same values after every epoch - changing LR and batch size does not seem to change anything.
This is the code I’m currently using:
def compute_metrics(eval_preds):
logits, labels = eval_preds
preds = np.argmax(logits, axis=-1)
acc = accuracy_score(labels, preds)
recall = recall_score(labels, preds, average='macro')
precision = precision_score(labels, preds, average='macro')
f1 = f1_score(labels, preds, average='macro')
metrics = {
'accuracy': acc,
'macro_f1': f1,
'macro_recall': recall,
'macro_precision': precision
}
return metrics
def fine_tune_transformer(model_ckpt, dataset, output_dir, max_len=256, batch_size=16, epochs=3, learning_rate=1e-5, num_labels=2):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt, num_labels=num_labels).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
def tokenize(batch):
return tokenizer(batch['text'], padding='max_length', truncation=True, max_length=max_len)
dataset_encoded = dataset.map(tokenize, batched=True, batch_size=None)
os.makedirs(output_dir, exist_ok=True)
training_args = TrainingArguments(output_dir=output_dir,
num_train_epochs=epochs,
learning_rate=learning_rate,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=0.01,
evaluation_strategy="epoch",
disable_tqdm=False,
push_to_hub=False,
log_level="error")
trainer = Trainer(model=model, args=training_args,
compute_metrics=compute_metrics,
train_dataset=dataset_encoded['train'],
eval_dataset=dataset_encoded['validation'],
tokenizer=tokenizer)
return trainer
model_ckpt = 'xlm-roberta-base'
output_dir = 'content/drive/MyDrive/outputs'
trainer = fine_tune_transformer(model_ckpt, joint_dataset, output_dir)
trainer.train()
Please see here an example of the callback that shows the metrics after every epoch:
I’ve also checked the logits after every epoch and they are different, see example below:
# epoch 1
# [[ 1.4335482 -1.6775645 ]
# [-0.59078467 0.49235147]
# [ 1.3932185 -1.6217263 ]
# ...
# [ 1.5345982 -1.7932017 ]
# [ 1.6025882 -1.8546286 ]
# [-0.59078467 0.49235126]]
# epoch 2:
# [[ 1.5743057 -1.806836 ]
# [-0.5966272 0.5360965 ]
# [ 1.5249034 -1.7561252 ]
# ...
# [ 1.594467 -1.8229653 ]
# [ 1.5586028 -1.7579104 ]
# [-0.596627 0.53609663]]
Please be noted that I’ve implemented the same code but fitting the model using Pytorch (not showing code due to its length, but let me know if needed and I’ll edit my response!) instead of the Trainer class and the training process is working as expected, hence I believe I’m doing something wrong with my use of the Trainer class.