BertForSequenceClassification - ValueError: Target size (torch.Size([32])) must be the same as input size (torch.Size([32, 35])))

Hi,

I’m currently building a multiclass classification with BERT and HuggingFace’s Trainer. We have 35 labels to classify. I simply convert these string label to a number label (For example, topic 1 = label 1). Please let me know why i’m encountering shape error when the tutorial said that we only need to convert to numerical labeling instead of one-hot encoding. Would love your feedback on this.

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', truncation=True, do_lower_case=True)

model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels)


class CustomDataset(Dataset):
    def __init__(self, df, tokenizer, max_len):
        self.dataframe = df
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.text = df['text']
        self.label = df.label

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        text = str(self.text[index])
        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens = True,
            max_length = self.max_len,
            padding = 'max_length',
            truncation = True,
            return_token_type_ids = True,
            return_attention_mask = True,
            return_tensors='pt',
        )
        label = self.label[index]

        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.float)
        }

 

train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
train_df = train_df.reset_index(drop=True)
val_df = val_df.reset_index(drop=True)

train_dataset = CustomDataset (train_df, tokenizer, max_len)
val_dataset = CustomDataset (val_df, tokenizer, max_len)

#==================================================================
# TRAINING MODEL
#==================================================================

def compute_metrics(eval_pred):
    labels = eval_pred.label_ids
    preds = eval_pred.predictions.argmax(-1)
    f1 = f1_score(labels, preds, average='weighted')
    return {'f1':f1}

class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        logits = outputs.get(logits)
        labels = outputs.get(labels)
        criterion = torch.nn.CrossEntropyLoss()
        loss = criterion(logits, labels)
        return (loss, outputs) if return_outputs else loss


batch_size=32
logging_steps = len(train_df) // batch_size
training_args = TrainingArguments(
    output_dir = 'test_output',
    overwrite_output_dir=True,
    num_train_epochs = 10,
    per_device_train_batch_size = 32,
    per_device_eval_batch_size = 32,
    warmup_steps = 50,
    learning_rate = 0.01,
    weight_decay = 0.04,
    logging_steps = logging_steps,
    evaluation_strategy = 'epoch',
    eval_steps=10,
)

trainer.train()```