Using hyperparameter-search in Trainer

FYI, this has been merged in master. Here is an example of use:

from nlp import load_dataset, load_metric
from transformers import AutoModelForSequenceClassification, AutoTokenizer, DataCollatorWithPadding, Trainer, TrainingArguments

tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
dataset = load_dataset('glue', 'mrpc')
metric = load_metric('glue', 'mrpc')

def encode(examples):
    outputs = tokenizer(examples['sentence1'], examples['sentence2'], truncation=True)
    return outputs

encoded_dataset = dataset.map(encode, batched=True)
# Won't be necessary when this PR is merged with master since the Trainer will do it automatically
encoded_dataset.set_format(columns=['attention_mask', 'input_ids', 'token_type_ids', 'label'])

def model_init():
    return AutoModelForSequenceClassification.from_pretrained('bert-base-cased', return_dict=True)

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = predictions.argmax(axis=-1)
    return metric.compute(predictions=predictions, references=labels)

# Evaluate during training and a bit more often than the default to be able to prune bad trials early.
# Disabling tqdm is a matter of preference.
training_args = TrainingArguments("test", evaluate_during_training=True, eval_steps=500, disable_tqdm=True)
trainer = Trainer(
    args=training_args,
    data_collator=DataCollatorWithPadding(tokenizer),
    train_dataset=encoded_dataset["train"], 
    eval_dataset=encoded_dataset["validation"], 
    model_init=model_init,
    compute_metrics=compute_metrics,
)

# Defaut objective is the sum of all metrics when metrics are provided, so we have to maximize it.
trainer.hyperparameter_search(direction="maximize")

This will use optuna or Ray Tune, depending on which you have installed. If you have both, it will use optuna by default, but you can pass backend="ray" to use Ray Tune. Note that you need an installation from source of nlp to make the example work.

To customize the hyperparameter search space, you can pass a function hp_space to this call. Here is an example if you want to search higher learning rates than the default with optuna:

def my_hp_space(trial):
    return {
        "learning_rate": trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True),
        "num_train_epochs": trial.suggest_int("num_train_epochs", 1, 5),
        "seed": trial.suggest_int("seed", 1, 40),
        "per_device_train_batch_size": trial.suggest_categorical("per_device_train_batch_size", [4, 8, 16, 32, 64]),
    }

trainer.hyperparameter_search(direction="maximize", hp_space=my_hp_space)

and ray:

def my_hp_space_ray(trial):
    from ray import tune

    return {
        "learning_rate": tune.loguniform(1e-4, 1e-2),
        "num_train_epochs": tune.choice(range(1, 6)),
        "seed": tune.choice(range(1, 41)),
        "per_device_train_batch_size": tune.choice([4, 8, 16, 32, 64]),
    }

trainer.hyperparameter_search(direction="maximize", hp_space=my_hp_space)

If you want to customize the objective to minimize/maximize, pass along a function to compute_objective:

def my_objective(metrics):
    # Your elaborate computation here
    return result_to_optimize

trainer.hyperparameter_search(direction="maximize", compute_objective=my_objective)
16 Likes