RuntimeError when training: Expected floating point type for target with class probabilities, got Long

krickelthestick · December 17, 2023, 7:35pm

Hello together.
I am trying to finetune DistilBERT with a multiple choice dataset.

My code looks as follows:

import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, DistilBertForMultipleChoice, TrainingArguments, Trainer
from datasets import load_dataset
from transformers.pipelines.base import Dataset
import evaluate
import numpy as np

from data_collator import DataCollatorForMultipleChoice


def tokenize(examples):
    examples['labels'] = [to_number(choices) for choices in examples['labels']]

    questions = [[question] * 5 for question in examples['question']]
    choices = [choices for choices in examples['text']]

    questions = sum(questions, [])
    choices = sum(choices, [])

    tokenized_examples = tokenizer(questions, choices, padding=True, truncation=True)
    return {k: [v[i: i + 5] for i in range(0, len(v), 5)] for k, v in tokenized_examples.items()}


def to_number(choices):
    for i in range(5):
        choices[i] = i
    return choices


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)


checkpoint = 'distilbert-base-cased'
dataset = load_dataset('riddle_sense')
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

dataset = dataset.flatten()
dataset = dataset.rename_column('choices.label', 'labels')
dataset = dataset.rename_column('choices.text', 'text')
tokenized_dataset = dataset.map(tokenize, batched=True)

model = DistilBertForMultipleChoice.from_pretrained(checkpoint, num_labels=5)
accuracy = evaluate.load('accuracy')

training_args = TrainingArguments(
    output_dir='.\\finetuning\\training',
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=0.00005,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    push_to_hub=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    tokenizer=tokenizer,
    data_collator=DataCollatorForMultipleChoice(tokenizer),
    compute_metrics=compute_metrics
)

trainer.train()

The DataCollatorForMultipleChoice looks as follows:

from dataclasses import dataclass
from transformers.tokenization_utils_base import PreTrainedTokenizerBase, PaddingStrategy
from typing import Optional, Union
import torch

@dataclass
class DataCollatorForMultipleChoice:

    """
    Data collator that will dynamically pad the inputs for multiple choice received.
    """

    tokenizer: PreTrainedTokenizerBase
    padding: Union[bool, str, PaddingStrategy] = True
    max_length: Optional[int] = None
    pad_to_multiple_of: Optional[int] = None

    def __call__(self, features):
        label_name = 'label' if 'label' in features[0].keys() else 'labels'
        labels = [feature.pop(label_name) for feature in features]
        batch_size = len(features)
        num_choices = len(features[0]['input_ids'])

        flattened_features = [
            [{k: v[i] for k, v in feature.items()} for i in range(num_choices)] for feature in features
        ]
        flattened_features = sum(flattened_features, [])

        batch = self.tokenizer.pad(
            flattened_features,
            padding=self.padding,
            max_length=self.max_length,
            pad_to_multiple_of=self.pad_to_multiple_of,
            return_tensors='pt',
        )

        batch = {k: v.view(batch_size, num_choices, -1) for k, v in batch.items()}
        batch['labels'] = torch.tensor(labels, dtype=torch.int64)
        return batch

If i run the code the dataset gets loaded and tokenized seemingly without problem. However as soon as the finetuning starts i get the following Error:

  0%|          | 0/660 [00:00<?, ?it/s]You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
Traceback (most recent call last):
  File "PycharmProjects\finetune\finetuning.py", line 75, in <module>
    trainer.train()
  File "PycharmProjects\finetune\venv\lib\site-packages\transformers\trainer.py", line 1537, in train
    return inner_training_loop(
  File "PycharmProjects\finetune\venv\lib\site-packages\transformers\trainer.py", line 1854, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "PycharmProjects\finetune\venv\lib\site-packages\transformers\trainer.py", line 2728, in training_step
    loss = self.compute_loss(model, inputs)
  File "PycharmProjects\finetune\venv\lib\site-packages\transformers\trainer.py", line 2751, in compute_loss
    outputs = model(**inputs)
  File "PycharmProjects\finetune\venv\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "PycharmProjects\finetune\venv\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "PycharmProjects\finetune\venv\lib\site-packages\transformers\models\distilbert\modeling_distilbert.py", line 1379, in forward
    loss = loss_fct(reshaped_logits, labels)
  File "PycharmProjects\finetune\venv\lib\site-packages\torch\nn\modules\module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "PycharmProjects\finetune\venv\lib\site-packages\torch\nn\modules\module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "PycharmProjects\finetune\venv\lib\site-packages\torch\nn\modules\loss.py", line 1179, in forward
    return F.cross_entropy(input, target, weight=self.weight,
  File "PycharmProjects\finetune\venv\lib\site-packages\torch\nn\functional.py", line 3053, in cross_entropy
    return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
RuntimeError: Expected floating point type for target with class probabilities, got Long
  0%|          | 0/660 [00:21<?, ?it/s]

I tried following the NLP course as best as possible, but i am not sure what to do to get it to work. Some help getting this to run would be very appreciated.

Topic		Replies	Views
Expected scalar type Long but found Float using Trainer for BertForTokenClassification Beginners	6	4009	April 22, 2021
Multi_class_classification errors when fine-tuning via TrainerAPI Beginners	0	376	February 20, 2023
RuntimeError: result type Float can't be cast to the desired output type Long 🤗Transformers	1	214	January 24, 2025
BERT finetuning "index out of range in self" Intermediate	2	4133	August 24, 2021
Failing to format sentiment140 for Trainer Beginners	2	563	July 22, 2020

RuntimeError when training: Expected floating point type for target with class probabilities, got Long

Related topics