Expected `tensors` and `new_tensors` to have the same type but found <class 'tuple'> and <class 'torch.Tensor'>

I added the code to read the first item of tuple, which did not work, but the issue existed even before then.

from nltk.translate.bleu_score import corpus_bleu
from transformers import EvalPrediction

def compute_metrics(eval_pred: EvalPrediction):
    predictions, label_ids = eval_pred.predictions, eval_pred.label_ids

    # Ensure predictions are a tensor (handle tuple case)
    if isinstance(predictions, tuple):
        predictions = predictions[0]

    # Ensure label_ids are a tensor (handle tuple case)
    if isinstance(label_ids, tuple):
        label_ids = label_ids[0]

    # Decode predictions and labels
    decoded_preds = [tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions]
    decoded_labels = [[tokenizer.decode(label, skip_special_tokens=True)] for label in label_ids]

    # Calculate BLEU score
    bleu_score = corpus_bleu(decoded_labels, decoded_preds)
    return {"bleu": bleu_score}
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported


trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset= val_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 1,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 1,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
        evaluation_strategy="epoch",  # Enable evaluation during training
    ),
    compute_metrics=compute_metrics,
)

Full error:

AssertionError Traceback (most recent call last)
in <cell line: 1>()
----> 1 trainer_stats = trainer.train()

/usr/local/lib/python3.10/dist-packages/unsloth/tokenizer_utils.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)

/usr/local/lib/python3.10/dist-packages/unsloth/models/llama.py in _fast_inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _maybe_log_save_evaluate(self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval, start_time)
3047 metrics = None
3048 if self.control.should_evaluate:
→ 3049 metrics = self._evaluate(trial, ignore_keys_for_eval)
3050 is_new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial)
3051

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in _evaluate(self, trial, ignore_keys_for_eval, skip_scheduler)
3001
3002 def _evaluate(self, trial, ignore_keys_for_eval, skip_scheduler=False):
→ 3003 metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
3004 self._report_to_hp_search(trial, self.state.global_step, metrics)
3005

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
4048
4049 eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
→ 4050 output = eval_loop(
4051 eval_dataloader,
4052 description=“Evaluation”,

/usr/local/lib/python3.10/dist-packages/transformers/trainer.py in evaluation_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
4269 logits = self.gather_function((logits))
4270 if not self.args.batch_eval_metrics or description == “Prediction”:
→ 4271 all_preds.add(logits)
4272 if labels is not None:
4273 labels = self.gather_function((labels))

/usr/local/lib/python3.10/dist-packages/transformers/trainer_pt_utils.py in add(self, tensors)
320 self.tensors = tensors if self.do_nested_concat else [tensors]
321 elif self.do_nested_concat:
→ 322 self.tensors = nested_concat(self.tensors, tensors, padding_index=self.padding_index)
323 else:
324 self.tensors.append(tensors)

/usr/local/lib/python3.10/dist-packages/transformers/trainer_pt_utils.py in nested_concat(tensors, new_tensors, padding_index)
129 if not (isinstance(tensors, torch.Tensor) and isinstance(new_tensors, torch.Tensor)):
130 assert (
→ 131 type(tensors) is type(new_tensors)
132 ), f"Expected tensors and new_tensors to have the same type but found {type(tensors)} and {type(new_tensors)}."
133 if isinstance(tensors, (list, tuple)):

AssertionError: Expected tensors and new_tensors to have the same type but found <class ‘tuple’> and <class ‘torch.Tensor’>.

1 Like

There seems to be a problem that is difficult to solve when dealing with the DETR model, and I hope it’s not this…

1 Like

yeah, Im not sure whether there is anything I can do beyond manually changing the output type to match the expectations but I have no idea how to do it or whether its going to work. Im not sure if its feasible to override the internal functions of the trainer either…

1 Like