Hi all,
I encountered a ValueError when training the facebook/bart-base
Transformer with a sequence classification head.
It seems that the dimensions of the predictions are different to e.g. the bart-base-uncased
model for sequence classification.
I am using transformers version 4.6.1.
Here is an example script which you could copy paste to reproduce the error. I use my own toy data and only use a subset of it so that the script only runs for some minutes.
Note that when I replace "facebook/bart-base"
with "bert-base-uncased"
for model_name
down below, the script executes successfully.
Do you know what I am doing wrong?
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import TrainingArguments, Trainer, EvalPrediction, AutoConfig, AutoTokenizer, AutoModelForSequenceClassification
filepath = "https://raw.githubusercontent.com/DavidPfl/thesis_ds/main/data/archiv/suttner_data.tsv"
df_dataset = pd.read_csv(filepath, sep = "\t", header = None)
df_dataset["text"] = df_dataset.iloc[:,4] + df_dataset.iloc[:,5]
articles = df_dataset["text"].tolist()
labels = df_dataset.iloc[:,1].astype(int).tolist()
train_articles, test_articles, train_labels, test_labels = train_test_split(articles, labels, stratify=labels)
class HyperpartisanDataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
def compute_accuracy(p: EvalPrediction):
preds = np.argmax(p.predictions, axis=1)
return {"acc": (preds == p.label_ids).mean()}
model_name = "facebook/bart-base" # change this line to "bert-base-uncased" and the script executes successfully!
config = AutoConfig.from_pretrained(model_name, num_labels = 2)
tokenizer = AutoTokenizer.from_pretrained(model_name)
train_encodings = tokenizer(train_articles[:3], truncation=True, padding=True)
test_encodings = tokenizer(test_articles[:3], truncation=True, padding=True)
train_dataset = HyperpartisanDataset(train_encodings, train_labels[:3])
eval_dataset = HyperpartisanDataset(test_encodings, test_labels[:3])
model = AutoModelForSequenceClassification.from_pretrained(
model_name,
config=config,
)
training_args = TrainingArguments(
output_dir="./test",
do_train=True,
do_eval=True,
evaluation_strategy="epoch",
num_train_epochs = 1,
learning_rate=1e-4,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
logging_steps=200,
remove_unused_columns=False,
logging_dir="./logs",
)
trainer = Trainer(
model = model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
compute_metrics=compute_accuracy,
)
trainer.train()
This creates the ValueError: (in the last call it looks like the predictions have the wrong shape)
ValueError Traceback (most recent call last)
<ipython-input-35-2929c2069c3e> in <module>
36 )
37
---> 38 trainer.train()
~/transformers/lib/python3.8/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, **kwargs)
1109
1110 self.control = self.callback_handler.on_epoch_end(self.args, self.state, self.control)
-> 1111 self._maybe_log_save_evaluate(tr_loss, model, trial, epoch)
1112
1113 if self.args.tpu_metrics_debug or self.args.debug:
~/transformers/lib/python3.8/site-packages/transformers/trainer.py in _maybe_log_save_evaluate(self, tr_loss, model, trial, epoch)
1196 metrics = None
1197 if self.control.should_evaluate:
-> 1198 metrics = self.evaluate()
1199 self._report_to_hp_search(trial, epoch, metrics)
1200
~/transformers/lib/python3.8/site-packages/transformers/trainer.py in evaluate(self, eval_dataset, ignore_keys, metric_key_prefix)
1665 start_time = time.time()
1666
-> 1667 output = self.prediction_loop(
1668 eval_dataloader,
1669 description="Evaluation",
~/transformers/lib/python3.8/site-packages/transformers/trainer.py in prediction_loop(self, dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix)
1838
1839 if self.compute_metrics is not None and preds is not None and label_ids is not None:
-> 1840 metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
1841 else:
1842 metrics = {}
<ipython-input-34-e9488e1392dd> in compute_accuracy(p)
15 def compute_accuracy(p: EvalPrediction):
16
---> 17 preds = np.argmax(p.predictions, axis=1)
18 return {"acc": (preds == p.label_ids).mean()}
<__array_function__ internals> in argmax(*args, **kwargs)
~/transformers/lib/python3.8/site-packages/numpy/core/fromnumeric.py in argmax(a, axis, out)
1191
1192 """
-> 1193 return _wrapfunc(a, 'argmax', axis=axis, out=out)
1194
1195
~/transformers/lib/python3.8/site-packages/numpy/core/fromnumeric.py in _wrapfunc(obj, method, *args, **kwds)
53 bound = getattr(obj, method, None)
54 if bound is None:
---> 55 return _wrapit(obj, method, *args, **kwds)
56
57 try:
~/transformers/lib/python3.8/site-packages/numpy/core/fromnumeric.py in _wrapit(obj, method, *args, **kwds)
42 except AttributeError:
43 wrap = None
---> 44 result = getattr(asarray(obj), method)(*args, **kwds)
45 if wrap:
46 if not isinstance(result, mu.ndarray):
~/transformers/lib/python3.8/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order, like)
100 return _asarray_with_like(a, dtype=dtype, order=order, like=like)
101
--> 102 return array(a, dtype, copy=False, order=order)
103
104
ValueError: could not broadcast input array from shape (3,2) into shape (3,)