Why do my attempts to optimize hyperparameters with ray tune keep showing a 'trial error?

Hello Huggingface Community :hugs:! I’ve fine-tuned deberta with my own data and would like to do some hyperparameter optimization. I’m trying to use ray tune but it keeps showing a trial error in all the trials that appear. Can anybody help me, please?

I am not able to fix or find a standard ray implementation solution that uses the trainer.hyperparameter_search() yet.

My code:

class Dataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels=None):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        if self.labels:
            item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.encodings["input_ids"])    

def compute_metrics(p):
    print(type(p))
    pred, labels = p
    pred = np.argmax(pred, axis=1)

    accuracy = accuracy_score(y_true=labels, y_pred=pred, average='micro')
    recall = recall_score(y_true=labels, y_pred=pred, average='micro')
    precision = precision_score(y_true=labels, y_pred=pred, average='micro')
    f1 = f1_score(y_true=labels, y_pred=pred, average='micro')

    return {"accuracy": accuracy, "precision": precision, "recall": recall, "f1": f1}


def load_dataset(dataset_file, dataset_size):
  data = pd.read_csv('/content/postagged.csv')
  data = data[:dataset_size]
  data['label'] = data['label'].astype(int)
  data['premise'] = data['premise'].astype(str)
  data['hypothesis'] = data['hypothesis'].astype(str)
  return data


def load_model(model_hf):
  model_hf = 'microsoft/deberta-v3-base'
  bert_model = AutoModelForSequenceClassification.from_pretrained(model_hf, num_labels=3) 
  tokenizer = AutoTokenizer.from_pretrained(model_hf)
  return bert_model, tokenizer


def prepare_dataset(dataset_file, dataset_size, tokenizer):
  df = load_dataset(dataset_file, dataset_size)
  
  training_size = int(dataset_size * 0.8)
  #val_size = dataset_size - training_size
  
  premise = list(df['premise'])
  hypothesis = list(df['hypothesis'])
  y = list(df['label'])

  X_train_tokenized = tokenizer(premise[0:training_size], hypothesis[0:training_size], padding=True, truncation=True, max_length=256)
  y_train = y[0:training_size]

  X_val_tokenized = tokenizer(premise[training_size:dataset_size], hypothesis[training_size:dataset_size], padding=True, truncation=True, max_length=256)
  y_val = y[training_size:dataset_size]

  train_dataset = Dataset(X_train_tokenized, y_train)
  val_dataset = Dataset(X_val_tokenized, y_val)
  return train_dataset, val_dataset


def ray_hp_space(trial):
    return {
        "learning_rate": tune.loguniform(1e-6, 1e-4),
        "per_device_train_batch_size": tune.choice([16, 32, 64, 128]),
    }


def model_init(trial):
  return AutoModelForSequenceClassification.from_pretrained(
      model_hf,
      num_labels=3
  )   


def train_model(per_device_train_batch_size, model, train_dataset, val_dataset, compute_metrics, model_name):
  args = TrainingArguments(
      output_dir= "/content/driveTraining/",
      do_train=True,
      do_eval=True,
      evaluation_strategy="epoch",
      save_strategy="epoch",
      load_best_model_at_end=True,
      seed=42
  )

  trainer = Trainer(
      model=model,
      model_init=model_init,
      train_dataset=train_dataset,
      eval_dataset=val_dataset,
      compute_metrics=compute_metrics
      
  )
  reporter = CLIReporter(
          parameter_columns={
              "learning_rate": "lr",
              "warmup_steps": "warmup_steps",
              "weight_decay": "w_decay"
          },
          metric_columns=["eval_acc", "eval_loss", "epoch", "eval_f1"],
)
  best_trial = trainer.hyperparameter_search(
    direction="maximize",
    backend="ray",
    progress_reporter=reporter,
    hp_space=ray_hp_space
)

model, tokenizer = load_model(model_hf)
train_dataset, val_dataset = prepare_dataset(dataset_file, dataset_size, tokenizer)
train_model(per_device_train_batch_size, model, train_dataset, val_dataset, compute_metrics, model_name)

Error:

TuneError                                 Traceback (most recent call last)
<ipython-input-14-d666714d0178> in <module>
    168 model, tokenizer = load_model(model_hf)
    169 train_dataset, val_dataset = prepare_dataset(dataset_file, dataset_size, tokenizer)
--> 170 train_model(per_device_train_batch_size, model, train_dataset, val_dataset, compute_metrics, model_name)
    171 #test_model(model_name, tokenizer)

3 frames
<ipython-input-14-d666714d0178> in train_model(per_device_train_batch_size, model, train_dataset, val_dataset, compute_metrics, model_name)
    118           metric_columns=["eval_acc", "eval_loss", "epoch", "eval_f1"],
    119 )
--> 120   best_trial = trainer.hyperparameter_search(
    121     direction="maximize",
    122     backend="ray",

/usr/local/lib/python3.9/dist-packages/transformers/trainer.py in hyperparameter_search(self, hp_space, compute_objective, n_trials, direction, backend, hp_name, **kwargs)
   2536             HPSearchBackend.WANDB: run_hp_search_wandb,
   2537         }
-> 2538         best_run = backend_dict[backend](self, n_trials, direction, **kwargs)
   2539 
   2540         self.hp_search_backend = None

/usr/local/lib/python3.9/dist-packages/transformers/integrations.py in run_hp_search_ray(trainer, n_trials, direction, **kwargs)
    340         dynamic_modules_import_trainable.__mixins__ = trainable.__mixins__
    341 
--> 342     analysis = ray.tune.run(
    343         dynamic_modules_import_trainable,
    344         config=trainer.hp_space(None),

/usr/local/lib/python3.9/dist-packages/ray/tune/tune.py in run(run_or_experiment, name, metric, mode, stop, time_budget_s, config, resources_per_trial, num_samples, local_dir, search_alg, scheduler, keep_checkpoints_num, checkpoint_score_attr, checkpoint_freq, checkpoint_at_end, verbose, progress_reporter, log_to_file, trial_name_creator, trial_dirname_creator, chdir_to_trial_dir, sync_config, export_formats, max_failures, fail_fast, restore, server_port, resume, reuse_actors, raise_on_failed_trial, callbacks, max_concurrent_trials, trial_executor, _experiment_checkpoint_dir, _remote, _remote_string_queue)
    790     if incomplete_trials:
    791         if raise_on_failed_trial and not experiment_interrupted_event.is_set():
--> 792             raise TuneError("Trials did not complete", incomplete_trials)
    793         else:
    794             logger.error("Trials did not complete: %s", incomplete_trials)

TuneError: ('Trials did not complete', [_objective_1091e_00000, _objective_1091e_00001, _objective_1091e_00002, _objective_1091e_00003, _objective_1091e_00004, _objective_1091e_00005, _objective_1091e_00006, _objective_1091e_00007, _objective_1091e_00008, _objective_1091e_00009, _objective_1091e_00010, _objective_1091e_00011, _objective_1091e_00012, _objective_1091e_00013, _objective_1091e_00014, _objective_1091e_00015, _objective_1091e_00016, _objective_1091e_00017, _objective_1091e_00018, _objective_1091e_00019])