Cuda out of memory while using Trainer API

Hi

I am trying to test the trainer API of huggingface through this small code snippet on a toy small data. Unfortunately I am getting cuda memory error although I have 32gb cuda memory which seems sufficient for this small data. Any help will be greatly appreciated

from datasets import load_dataset,load_metric

import datasets as dtset

import transformers

import torch

from transformers import Trainer,TrainingArguments,set_seed
from transformers import BertTokenizer, BertForSequenceClassification, BertConfig
config = BertConfig.from_pretrained("bert-base-uncased",num_labels=6)
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
model = BertForSequenceClassification.from_pretrained("bert-base-uncased",config=config)
data_files = {'train':'F1000_train.csv','validation':'F1000_valid.csv','test':'F1000_test.csv'}
datasets = load_dataset("csv",data_files=data_files)

num_labels = len(datasets['train'].unique('label'))


def preprocess(examples):

     args = ((examples['sentence1'],))
     result = tokenizer(*args,padding='max_length',max_length=32,truncation=True)
     result['label'] = [l for l in examples['label']]
     return result
metric = load_metric('f1') 

training_args=TrainingArguments(output_dir=“Out”,prediction_loss_only=True,gradient_accumulation_steps=1,learning_rate=2e-5,weight_decay=1e-4,local_rank=-1,fp16=True)

trainer = Trainer(model = model,
    args = training_args,
    train_dataset=datasets["train"],
    eval_dataset= datasets["validation"],
    compute_metrics = metric,
)

train_result= trainer.train(resume_from_checkpoint=None)
1 Like

Could this be related to this issue?