I have a strange behaviour when I am using custom HP function.
The results are the same on all trails and epoches.
default example:
def compute_metrics(eval_preds):
metric = load_metric("f1")
logits, labels = eval_preds
predictions = np.argmax(logits, axis=-1)
#evaluate(labels, predictions)
return metric.compute(predictions=predictions, references=labels,average='weighted')
args = TrainingArguments(
MODEL_NAME,
evaluation_strategy = "epoch",
save_strategy = "epoch",
learning_rate=2e-5,
per_device_train_batch_size=TR_BATCH_SIZE,
per_device_eval_batch_size=TEST_BATCH_SIZE,
num_train_epochs=5,
weight_decay=0.01,
load_best_model_at_end=True,
metric_for_best_model='f1',
push_to_hub=False,
)
train_dataset = tokenized_train["train"].shard(index=1, num_shards=10)
trainer = Trainer(
model_init=model_init,
args=args,
train_dataset=train_dataset,
eval_dataset=tokenized_test['train'],
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
best_run = trainer.hyperparameter_search(n_trials=10, direction="maximize")
the results are :
but when I am using custom :
def my_hp_space(trial):
return {
"learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
"num_train_epochs": trial.suggest_int("num_train_epochs", 1, 3),
"seed": trial.suggest_int("seed", 1, 40),
"per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [1, 2, 4,6, 8]),
}
trainer.hyperparameter_search(direction="maximize", hp_space=my_hp_space)