Dimension mismatch when training BART with Trainer

Hi all,

I encountered a ValueError when training the facebook/bart-base Transformer with a sequence classification head.
It seems that the dimensions of the predictions are different to e.g. the bart-base-uncased model for sequence classification.

I am using transformers version 4.6.1.

Here is an example script which you could copy paste to reproduce the error. I use my own toy data and only use a subset of it so that the script only runs for some minutes.
Note that when I replace "facebook/bart-base" with "bert-base-uncased" for model_name down below, the script executes successfully.

Do you know what I am doing wrong?

import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import TrainingArguments, Trainer, EvalPrediction, AutoConfig, AutoTokenizer, AutoModelForSequenceClassification

filepath = "https://raw.githubusercontent.com/DavidPfl/thesis_ds/main/data/archiv/suttner_data.tsv"
df_dataset = pd.read_csv(filepath, sep = "\t", header = None)
df_dataset["text"] = df_dataset.iloc[:,4] + df_dataset.iloc[:,5]
articles = df_dataset["text"].tolist()
labels = df_dataset.iloc[:,1].astype(int).tolist()
train_articles, test_articles, train_labels, test_labels = train_test_split(articles, labels, stratify=labels)

class HyperpartisanDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)


def compute_accuracy(p: EvalPrediction):   
    preds = np.argmax(p.predictions, axis=1)
    return {"acc": (preds == p.label_ids).mean()}
    
model_name = "facebook/bart-base" # change this line to "bert-base-uncased" and the script executes successfully!
config = AutoConfig.from_pretrained(model_name, num_labels = 2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_articles[:3], truncation=True, padding=True)
test_encodings = tokenizer(test_articles[:3], truncation=True, padding=True)
train_dataset = HyperpartisanDataset(train_encodings, train_labels[:3])
eval_dataset = HyperpartisanDataset(test_encodings, test_labels[:3])

model =  AutoModelForSequenceClassification.from_pretrained(
            model_name,
            config=config,
)

training_args = TrainingArguments(
    output_dir="./test",
    do_train=True,
    do_eval=True,
    evaluation_strategy="epoch",
    num_train_epochs = 1, 
    learning_rate=1e-4, 
    per_device_train_batch_size=8, 
    per_device_eval_batch_size=8,
    logging_steps=200,
    remove_unused_columns=False,
    logging_dir="./logs",
)


trainer = Trainer(
    model = model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_accuracy,
)

trainer.train()

This creates the ValueError: (in the last call it looks like the predictions have the wrong shape)

ValueError                                Traceback (most recent call last)
<ipython-input-35-2929c2069c3e> in <module>
     36 )
     37 
---> 38 trainer.train()

~/transformers/lib/python3.8/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, **kwargs)
   1109 
   1110             self.control = self.callback_handler.on_epoch_end(self.args, self.state, self.control)
-> 1111             self._maybe_log_save_evaluate(tr_loss, model, trial, epoch)
   1112 
   1113             if self.args.tpu_metrics_debug or self.args.debug:

~/transformers/lib/python3.8/site-packages/transformers/trainer.py in _maybe_log_save_evaluate(self, tr_loss, model, trial, epoch)
   1196         metrics = None
   1197         if self.control.should_evaluate:
-> 1198             metrics = self.evaluate()
   1199             self._report_to_hp_search(trial, epoch, metrics)
   1200 

~/transformers/lib/python3.8/site-packages/transformers/trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
   1665         start_time = time.time()
   1666 
-> 1667         output = self.prediction_loop(
   1668             eval_dataloader,
   1669             description="Evaluation",

~/transformers/lib/python3.8/site-packages/transformers/trainer.py in prediction_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
   1838 
   1839         if self.compute_metrics is not None and preds is not None and label_ids is not None:
-> 1840             metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
   1841         else:
   1842             metrics = {}

<ipython-input-34-e9488e1392dd> in compute_accuracy(p)
     15 def compute_accuracy(p: EvalPrediction):
     16 
---> 17     preds = np.argmax(p.predictions, axis=1)
     18     return {"acc": (preds == p.label_ids).mean()}

<__array_function__ internals> in argmax(*args, **kwargs)

~/transformers/lib/python3.8/site-packages/numpy/core/fromnumeric.py in argmax(a, axis, out)
   1191 
   1192     """
-> 1193     return _wrapfunc(a, 'argmax', axis=axis, out=out)
   1194 
   1195 

~/transformers/lib/python3.8/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
     53     bound = getattr(obj, method, None)
     54     if bound is None:
---> 55         return _wrapit(obj, method, *args, **kwds)
     56 
     57     try:

~/transformers/lib/python3.8/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args, **kwds)
     42     except AttributeError:
     43         wrap = None
---> 44     result = getattr(asarray(obj), method)(*args, **kwds)
     45     if wrap:
     46         if not isinstance(result, mu.ndarray):

~/transformers/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order, like)
    100         return _asarray_with_like(a, dtype=dtype, order=order, like=like)
    101 
--> 102     return array(a, dtype, copy=False, order=order)
    103 
    104 

ValueError: could not broadcast input array from shape (3,2) into shape (3,)
1 Like

Hi, @DavidPfl, I am facing a similar issue. Were you able to fix this?

Thanks!

@joeddav or @valhalla - Can you please help us on this? Thanks

Hi, sorry for the late reply.
I was able to fix it with this definition for compute_metrics:

def compute_metrics(p: EvalPrediction):

    metric_acc = datasets.load_metric("accuracy")    
    preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    preds = np.argmax(preds, axis=1)
    result = {}
    result["accuracy"] = metric_acc.compute(predictions=preds, references=p.label_ids)["accuracy"]
    return result

I think the issue is that BartForSequenceClassification returns sequence-to-sequence-classifications instead of a single classification for an input sequence (As Bert does) and I copied the metrics-computation from a script which utilized BertForSequenceClassification.

Thanks a lot. Didnt realise that it was because of compute_metrics. Will give it a try soon. Thanks!