Strange behaviour while using custom Hyperparameter search

I have a strange behaviour when I am using custom HP function.
The results are the same on all trails and epoches.

default example:

def compute_metrics(eval_preds):
  metric = load_metric("f1")
  logits, labels = eval_preds
  predictions = np.argmax(logits, axis=-1)
  #evaluate(labels, predictions)
  return metric.compute(predictions=predictions, references=labels,average='weighted')

args = TrainingArguments(
    MODEL_NAME,
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=TR_BATCH_SIZE,
    per_device_eval_batch_size=TEST_BATCH_SIZE,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    push_to_hub=False,
)
train_dataset = tokenized_train["train"].shard(index=1, num_shards=10) 
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=tokenized_test['train'],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)
best_run = trainer.hyperparameter_search(n_trials=10, direction="maximize")

the results are :
image

but when I am using custom :

def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 1, 3),
        "seed": trial.suggest_int("seed", 1, 40),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [1, 2, 4,6, 8]),
    }
trainer.hyperparameter_search(direction="maximize", hp_space=my_hp_space)

image