Hi!
I’m fine-tuning a model on my own dataset but I’m getting worst macro F1 and accuracy results after hyperparameter optimization with Optuna. What I’m doing wrong? Please, find my code below:
LEARNING_RATE_HIGH = 0.01
LEARNING_RATE_LOW = 4e-5
MAX_EPOCHS = 5
MIN_EPOCHS = 2
NUM_TRIALS = 25
PER_DEVICE_EVAL_BATCH = 8
PER_DEVICE_TRAIN_BATCH = 8
WEIGHT_DECAY_HIGH = 0.01
WEIGHT_DECAY_LOW = 4e-5
import optuna
# I pretrained cardiffnlp/twitter-xlm-roberta-base-sentiment on my
# dataset before and saved the result at trained_model_path.
def objective(trial: optuna.Trial):
model = AutoModelForSequenceClassification.from_pretrained(
trained_model_path
)
training_arguments = TrainingArguments(
learning_rate=trial.suggest_loguniform(
"learning_rate",
high=LEARNING_RATE_HIGH,
low=LEARNING_RATE_LOW,
),
num_train_epochs=trial.suggest_int(
"num_train_epochs",
high=MAX_EPOCHS,
low=MIN_EPOCHS,
),
output_dir="hyperparameter_optimization",
per_device_eval_batch_size=PER_DEVICE_EVAL_BATCH,
per_device_train_batch_size=PER_DEVICE_TRAIN_BATCH,
weight_decay=trial.suggest_loguniform(
"weight_decay",
WEIGHT_DECAY_LOW,
WEIGHT_DECAY_HIGH,
),
)
trainer = Trainer(
args=training_arguments,
data_collator=data_collator,
eval_dataset=tokenized_dataset["eval"],
model=model,
tokenizer=tokenizer,
train_dataset=tokenized_dataset["train"],
)
result = trainer.train()
return result.training_loss
study = optuna.create_study(
direction="minimize",
study_name=model_name+"-hyperparameter-optimization"
)
study.optimize(func=objective, n_trials=NUM_TRIALS)
model = AutoModelForSequenceClassification.from_pretrained(
trained_model_path
)
training_arguments = TrainingArguments(
learning_rate=float(study.best_params['learning_rate']),
num_train_epochs=int(study.best_params['num_train_epochs']),
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
output_dir="best_parameters",
weight_decay=float(study.best_params['weight_decay']),
)
trainer = Trainer(
args=training_arguments,
compute_metrics=compute_metrics,
data_collator=data_collator,
eval_dataset=tokenized_dataset["eval"],
model=model,
tokenizer=tokenizer,
train_dataset=tokenized_dataset["train"],
)
trainer.train()
trainer.evaluate()