RoBERTa fine-tuning, CUBLAS_STATUS_NOT_SUPPORTED

MrFitzmaurice · December 20, 2022, 11:55am

Hello, I am trying to fine-tune a roberta model for topic categorisation using my own dataset.
I am getting the following error when running on a GPU

File /lib/python3.9/site-packages/transformers/models/roberta/modeling_roberta.py", line 235, in forward
    attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_SUPPORTED when calling `cublasSgemmStridedBatched( handle, opa, opb, m, n, k, &alpha, a, lda, stridea, b, ldb, strideb, &beta, c, ldc, stridec, num_batches)`

Due to not being the owner of the device I am training on I am limited to the following specs:

Core i5-11500 (6 cores/12 threads) / 64GB RAM / Nvidia A5000 (24GB)
CUDA 11.6

Below is the code I have been using:

`import pandas as pd
import datasets
from transformers import RobertaTokenizerFast, RobertaForSequenceClassification, Trainer, TrainingArguments
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tqdm import tqdm
import os

# setup cuda device for training if available, move model into configured device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print('Using device:', device)
print('Using device:', device)

os.environ["WANDB_DISABLED"] = "true"

from huggingface_hub import login

login("token")

# get data and add column names
data = pd.read_csv("data_combined_24.csv", delimiter=",", header=None, names=["label", "text"])

#randomly shuffle the data
data = data.sample(frac=1).reset_index(drop=True)

#split into train and test (90/10)
train_data = data[:int(len(data)*0.9)]
test_data = data[int(len(data)*0.9)+1:]

model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=23)
tokenizer = RobertaTokenizerFast.from_pretrained('roberta-base', max_length = 512)

# tokenize the data adding padding so that all sequences are the same length
t_train_data = tokenizer(train_data.text.tolist(), truncation=True, padding=True)
t_test_data = tokenizer(test_data.text.tolist(), truncation=True, padding=True)

# get all input_ids
train_input_ids = []
train_attention_masks = []

for i in range(len(t_train_data["input_ids"])):
    train_input_ids.append(t_train_data['input_ids'][i])
    train_attention_masks.append(t_train_data['attention_mask'][i])

test_input_ids = []
test_attention_masks = []

for i in range(len(t_test_data["input_ids"])):
    test_input_ids.append(t_test_data['input_ids'][i])
    test_attention_masks.append(t_test_data['attention_mask'][i])

# get all unique topics
topics = data['label'].unique()

# create mapping from topic to integer
topic_to_int = {}
for i in range(len(topics)):
    topic_to_int[topics[i]] = i

# create mapping from integer to topic
int_to_topic = {}
for i in range(len(topics)):
    int_to_topic[i] = topics[i]

# create np array combining input_ids and attention_mask and label
train_data_np = np.array([train_input_ids, train_attention_masks, list(train_data['label'])], dtype=object).T
test_data_np = np.array([test_input_ids, test_attention_masks, list(test_data['label'])], dtype=object).T

final_train = pd.DataFrame(train_data_np, columns=['input_ids', 'attention_mask', 'label'])

final_test = pd.DataFrame(test_data_np, columns=['input_ids', 'attention_mask', 'label'])

# map label to integer
if type(final_train['label'][0]) == str:
    final_train['label'] = final_train['label'].map(lambda x: topic_to_int[x])

if type(final_test['label'][0]) == str:
    final_test['label'] = final_test['label'].map(lambda x: topic_to_int[x])


dataframe_train = datasets.Dataset.from_pandas(final_train)
dataframe_test = datasets.Dataset.from_pandas(final_test)


def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc
    }

training_args = TrainingArguments(
    f"roberta-finetuned-topic",
    evaluation_strategy = "epoch",
    save_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    push_to_hub=True,
)

trainer = Trainer(
    model,
    training_args,
    train_dataset=dataframe_train,
    eval_dataset=dataframe_test,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()`

If any more info is needed please let me know and I will edit the post

Topic		Replies	Views
Train Roberta from scratch for custom dataset Intermediate	1	949	May 2, 2023
Training a LM from scratch on large corpus Beginners	0	378	August 10, 2020
Hyperparameter Tuning QNLI Colab Example using RoBERTa "RuntimeError('CUDA out of memory..." 🤗Transformers	0	311	May 20, 2021
Facebook BART Fine-tuning - Transformers CUDA error: CUBLAS_STATUS_NOT_INITIALIZE 🤗Transformers	4	1776	May 2, 2023
Roberta-base takes too long 🤗Transformers	0	324	August 3, 2022

RoBERTa fine-tuning, CUBLAS_STATUS_NOT_SUPPORTED

Related topics