Hello Scholars!
I have code that worked at the end of 2022 that no longer works now. I am fine-tuning RoBERTa -base to classify reviews with 3 labels (0=neutral, 1=positive, 2=negative). I am using Google Colab. I can fine-tune and train the model without error, but I run into an error when I try to evaluate the model and print the classification report.
Here is my code:
! pip install simpletransformers -q
import os
import numpy as np
import pandas as pd
import sklearn
from tqdm import tqdm
from scipy import spatial
import spacy
from simpletransformers.classification import ClassificationModel
np.random.seed(619)
roberta = ClassificationModel('roberta', 'roberta-base')
Prepping the data
reviews = pd.read_csv[dropbox file]
# split into train/test/validate
reviews.loc[: , 'split'] = np.random.choice([0,1,2], len(reviews), p=[.8, .1, .1])
train = reviews.loc[reviews.split==0 ,['text', 'labels']]
test = reviews.loc[reviews.split==1 ,['text', 'labels']]
validate = reviews.loc[reviews.split==2 ,['text', 'labels']]
Train the model
model = ClassificationModel(
'roberta'
, 'roberta-base'
, num_labels=3
, use_cuda=True
, args={'reprocess_input_data': True
, 'overwrite_output_dir': True
, 'training_batch_size': 1024
, 'eval_batch_size': 1024
, 'num_train_epochs': 6
, 'output_dir': r'ModelOutput1'
, 'cache_dir': r'ModelOutput1'})
model.train_model(train_df=train, eval_df=test)
Evaluating the model
result, model_outputs, wrong_predictions = model.eval_model(validate)
y_t = list(validate.labels)
y_hat = [np.argmax(a) for a in model_outputs]
print(sklearn.metrics.classification_report(y_true=y_t, y_pred=y_hat))
I receive an error when I run this last segment of code (more specifically, when running the first line under “evaluating the model”). The error is as follows:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-6-f2470b9fe547> in <cell line: 1>()
----> 1 result, model_outputs, wrong_predictions = model.eval_model(validate)
2
3 y_t = list(validate.labels)
4 y_hat = [np.argmax(a) for a in model_outputs]
5 print(sklearn.metrics.classification_report(y_true=y_t, y_pred=y_hat))
6 frames
/usr/local/lib/python3.10/dist-packages/simpletransformers/classification/classification_model.py in eval_model(self, eval_df, multi_label, output_dir, verbose, silent, wandb_log, **kwargs)
1357 self._move_model_to_device()
1358
-> 1359 result, model_outputs, wrong_preds = self.evaluate(
1360 eval_df,
1361 output_dir,
/usr/local/lib/python3.10/dist-packages/simpletransformers/classification/classification_model.py in evaluate(self, eval_df, output_dir, multi_label, prefix, verbose, silent, wandb_log, **kwargs)
1597 preds = np.argmax(preds, axis=1)
1598
-> 1599 result, wrong = self.compute_metrics(
1600 preds, model_outputs, out_label_ids, eval_examples, **kwargs
1601 )
/usr/local/lib/python3.10/dist-packages/simpletransformers/classification/classification_model.py in compute_metrics(self, preds, model_outputs, labels, eval_examples, multi_label, **kwargs)
1927 mcc = matthews_corrcoef(labels, preds)
1928 accuracy = accuracy_score(labels, preds)
-> 1929 f1 = f1_score(labels, preds)
1930 if self.model.num_labels == 2:
1931 tn, fp, fn, tp = confusion_matrix(labels, preds, labels=[0, 1]).ravel()
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py in f1_score(y_true, y_pred, labels, pos_label, average, sample_weight, zero_division)
1144 array([0.66666667, 1. , 0.66666667])
1145 """
-> 1146 return fbeta_score(
1147 y_true,
1148 y_pred,
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py in fbeta_score(y_true, y_pred, beta, labels, pos_label, average, sample_weight, zero_division)
1285 """
1286
-> 1287 _, _, f, _ = precision_recall_fscore_support(
1288 y_true,
1289 y_pred,
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py in precision_recall_fscore_support(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)
1571 if beta < 0:
1572 raise ValueError("beta should be >=0 in the F-beta score")
-> 1573 labels = _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
1574
1575 # Calculate tp_sum, pred_sum, true_sum ###
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py in _check_set_wise_labels(y_true, y_pred, average, labels, pos_label)
1389 if y_type == "multiclass":
1390 average_options.remove("samples")
-> 1391 raise ValueError(
1392 "Target is %s but average='binary'. Please "
1393 "choose another average setting, one of %r." % (y_type, average_options)
ValueError: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
I’ve tried different ways of changing the average default from binary to none or micro but have not been successful. Any help would be appreciated!