I’m trying to fine-tune xlm-roberta-base
model for binary sentiment classification problem on review data.
I’ve implemented the code as follows:
Split data into train, validation set.
from sklearn.model_selection import train_test_split
train_texts, val_texts, train_labels, val_labels = train_test_split(sample['text'],
sample['sentiment'],
test_size=0.2,
stratify=sample['sentiment'])
Prepared the datasets for training:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('xlm-roberta-base')
train_encodings = tokenizer(text=list(train_texts), max_length=200, truncation=True, padding=True, return_tensors='pt')
val_encodings = tokenizer(text=list(val_texts), max_length=200, truncation=True, padding=True, return_tensors='pt')
import torch
class Dataset(torch.utils.data.Dataset):
def __init__(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
train_dataset = Dataset(train_encodings, list(train_labels))
val_dataset = Dataset(val_encodings, list(val_labels))
Then I setup the training parameters.
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
output_dir='/content/drive/MyDrive/Workshop/sentiment_analysis/model',
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
warmup_steps=500,
weight_decay=0.01,
logging_dir='/content/drive/MyDrive/Workshop/sentiment_analysis/logs',
logging_steps=100,
)
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from transformers import AutoModelForSequenceClassification, BertForSequenceClassification
model = AutoModelForSequenceClassification.from_pretrained("xlm-roberta-base", num_labels=2).to(device)
And, finally used the Trainer API for training.
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset
)
trainer.train()
Then I got the following error message:
***** Running training *****
Num examples = 800
Num Epochs = 3
Instantaneous batch size per device = 8
Total train batch size (w. parallel, distributed & accumulation) = 8
Gradient Accumulation steps = 1
Total optimization steps = 300
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:9: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).
if __name__ == '__main__':
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-19-d5efe8e9a1d1> in <module>()
6 )
7
----> 8 trainer.train()
7 frames
/usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
2956
2957 if not (target.size() == input.size()):
-> 2958 raise ValueError("Target size ({}) must be the same as input size ({})".format(target.size(), input.size()))
2959
2960 return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
ValueError: Target size (torch.Size([8])) must be the same as input size (torch.Size([8, 2]))
Where did I make mistake?