Hi all, I receive a CUDA error when doing trainer.train()
Caught RuntimeError in replica 0 on device 0.
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling cublasCreate(handle)
Full traceback
RuntimeError Traceback (most recent call last)
<ipython-input-14-3435b262f1ae> in <module>
----> 1 trainer.train()
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1314 tr_loss_step = self.training_step(model, inputs)
1315 else:
-> 1316 tr_loss_step = self.training_step(model, inputs)
1317
1318 if (
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/trainer.py in training_step(self, model, inputs)
1847 loss = self.compute_loss(model, inputs)
1848 else:
-> 1849 loss = self.compute_loss(model, inputs)
1850
1851 if self.args.n_gpu > 1:
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
1879 else:
1880 labels = None
-> 1881 outputs = model(**inputs)
1882 # Save past state if it exists
1883 # TODO: this needs to be fixed and made cleaner later.
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
1127 if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks
1128 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1129 return forward_call(*input, **kwargs)
1130 # Do not call functions when jit is used
1131 full_backward_hooks, non_full_backward_hooks = [], []
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/parallel/data_parallel.py in forward(self, *inputs, **kwargs)
166 return self.module(*inputs[0], **kwargs[0])
167 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
--> 168 outputs = self.parallel_apply(replicas, inputs, kwargs)
169 return self.gather(outputs, self.output_device)
170
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/parallel/data_parallel.py in parallel_apply(self, replicas, inputs, kwargs)
176
177 def parallel_apply(self, replicas, inputs, kwargs):
--> 178 return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])
179
180 def gather(self, outputs, output_device):
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/parallel/parallel_apply.py in parallel_apply(modules, inputs, kwargs_tup, devices)
84 output = results[i]
85 if isinstance(output, ExceptionWrapper):
---> 86 output.reraise()
87 outputs.append(output)
88 return outputs
/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/_utils.py in reraise(self)
461 # instantiate since we don't know how to
462 raise RuntimeError(msg) from None
--> 463 raise exception
464
465
RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/parallel/parallel_apply.py", line 61, in _worker
output = module(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1147, in _call_impl
result = forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/models/bert/modeling_bert.py", line 1530, in forward
outputs = self.bert(
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1147, in _call_impl
result = forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/models/bert/modeling_bert.py", line 996, in forward
encoder_outputs = self.encoder(
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1129, in _call_impl
return forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/models/bert/modeling_bert.py", line 583, in forward
layer_outputs = layer_module(
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1129, in _call_impl
return forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/models/bert/modeling_bert.py", line 470, in forward
self_attention_outputs = self.attention(
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1129, in _call_impl
return forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/models/bert/modeling_bert.py", line 400, in forward
self_outputs = self.self(
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1129, in _call_impl
return forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/transformers/models/bert/modeling_bert.py", line 266, in forward
mixed_query_layer = self.query(hidden_states)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/module.py", line 1129, in _call_impl
return forward_call(*input, **kwargs)
File "/mnt/xarfuse/uid-25535/3f6eefe6-seed-nspid4026533657_cgpid9273391-ns-4026533600/torch/nn/modules/linear.py", line 103, in forward
return F.linear(input, self.weight, self.bias)
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
from transformers import AutoModelForSequenceClassification
import bento
from bento import fwdproxy
from transformers import AutoTokenizer
from transformers import TrainingArguments
import numpy as np
from transformers import Trainer
from transformers import RobertaTokenizer
import pandas as pd
import datetime
from datetime import datetime
from datetime import date
import torch
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from transformers import BertTokenizerFast, BertForSequenceClassification
model_name = "bert-base-uncased"
# max sequence length for each document/sentence sample
max_length = 512
tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True)
def read_data(df, test_size=0.2):
# download & load 20newsgroups dataset from sklearn's repos
df.dropna(inplace = True)
df = df.sample(frac=1)
df['topic_id'] = df['topic_id'].astype(int)
df.rename(columns = {'topic_id':'labels'}, inplace = True)
documents = df.comment.tolist()
labels = np.array(df.labels.tolist())
# split into training & testing a return data as well as label names
return train_test_split(documents, labels, test_size=test_size), labels
# call the function
(train_texts, valid_texts, train_labels, valid_labels), target_names = read_data(df)
train_encodings = tokenizer(train_texts, truncation=True, padding=True, max_length=max_length)
valid_encodings = tokenizer(valid_texts, truncation=True, padding=True, max_length=max_length)
class data(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
item["labels"] = torch.tensor([self.labels[idx]])
return item
def __len__(self):
return len(self.labels)
# convert our tokenized data into a torch Dataset
train_dataset = data(train_encodings, train_labels)
valid_dataset = data(valid_encodings, valid_labels)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(list(set(df['topic_id'].tolist()))))
training_args = TrainingArguments(
output_dir='./results', # output directory
num_train_epochs=3, # total number of training epochs
per_device_train_batch_size=4, # batch size per device during training
per_device_eval_batch_size=24, # batch size for evaluation
#warmup_steps=500, # number of warmup steps for learning rate scheduler
#weight_decay=0.01, # strength of weight decay
logging_dir='./logs', # directory for storing logs
load_best_model_at_end=True, # load the best model when finished training (default metric is loss)
# but you can specify `metric_for_best_model` argument to change to accuracy or other metric
logging_steps=400, # log & save weights each logging_steps
save_steps=400,
evaluation_strategy= 'steps'
#metrics
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
# calculate accuracy using sklearn's function
acc = accuracy_score(labels, preds)
balanced_accuracy = balanced_accuracy_score(labels, preds)
f1 = f1_score(labels, preds)
recall = recall_score(labels, preds)
precision = precision_score(labels, preds)
return {
'accuracy': acc,
'balanced_accuracy': balanced_accuracy,
'f1_score': f1,
'recall': recall,
'precision': precision
}
trainer = Trainer(
model=model, # the instantiated Transformers model to be trained
args=training_args, # training arguments, defined above
train_dataset=train_dataset, # training dataset
eval_dataset=valid_dataset, # evaluation dataset
compute_metrics=compute_metrics, # the callback that computes metrics of interest
)
trainer.train()