Multi_class_classification errors when fine-tuning via TrainerAPI

I’m attempting to do a multiclass finetuning of distilbert using the following code:

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
from datasets import load_dataset
import evaluate
from tqdm.notebook import tqdm
from datasets import load_dataset
import numpy as np

import os

dataset = load_dataset(“csv”, data_files=“datafile.csv”,keep_default_na=False)
dataset[“train”] = dataset[“train”].rename_column(“label”, “labels”)
dataset = dataset[“train”].train_test_split(test_size=0.3)
dataset[“train”] = dataset[“train”].shuffle()
dataset[“test”] = dataset[“test”].shuffle()
def preprocess_function(examples):
return tokenizer(examples[“text”], truncation=True)

model_id = “distilbert-base-uncased”
tokenizer = AutoTokenizer.from_pretrained(model_id, max_length = 512, model_max_length = 512)
tokenized_dataset = dataset.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
accuracy = evaluate.load(“accuracy”)

def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
return accuracy.compute(predictions=predictions, references=labels)

id2label = {0: “casual”, 1: “possibly_needs_caution”, 2: “probably_needs_caution”, 3: “needs_caution”, 4: “needs_intervention”}
label2id = {“casual”: 0, “possibly_needs_caution”: 1, “probably_needs_caution”: 2, “needs_caution”: 3, “needs_intervention”: 4}

model = AutoModelForSequenceClassification.from_pretrained(

model_id, num_labels=5, id2label=id2label, label2id=label2id,problem_type="multi_class_classification"

)

tokenized_dataset[“train”] = tokenized_dataset[“train”].remove_columns([‘text’,‘Unnamed: 0’])
tokenized_dataset[“test”] = tokenized_dataset[“test”].remove_columns([‘text’,‘Unnamed: 0’])

training_args = TrainingArguments(

output_dir="safety_instruct_detect",

learning_rate=4.5e-5,

per_device_train_batch_size=8,

per_device_eval_batch_size=8,

#fp16 = True,

warmup_steps = 1000, 

num_train_epochs=2,

evaluation_strategy="epoch",

save_strategy="epoch",

load_best_model_at_end=True,

push_to_hub=False,

)

trainer = Trainer(

model=model,

args=training_args,

train_dataset=tokenized_dataset["train"],

eval_dataset=tokenized_dataset["test"],

tokenizer=tokenizer,

data_collator=data_collator,


compute_metrics=compute_metrics,

)

trainer.train()

I get the following error:

ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are labels,input_ids,attention_mask.

Here are a few things i’ve tried:

  • Using BertForSequenceClassification rather than AutoModelForSequenceClassification

  • Making the labels in the dataset a float rather than int

  • Using both “label” and “labels” as column names a few different times