I am trying to classify sentences into 5 categories but once I try to train my model i am stuck here because I keep getting this error:
ValueError: The model did not return a loss from the inputs, only the following keys: last_hidden_state,pooler_output. For reference, the inputs it received are input_ids,token_type_ids,attention_mask.
I have provided he code below
from transformers import AutoModel,Trainer, TrainingArguments, AutoTokenizer
model = AutoModel.from_pretrained(“allenai/scibert_scivocab_uncased”,num_labels = 5)
tokenizer = AutoTokenizer.from_pretrained(“allenai/scibert_scivocab_uncased”)
Initialize empty lists for tags and text
tags_list =
text_list =
Iterate through the test_data list
for entry in train_data:
# Extract tags and text from the entry dictionary
tags = entry[“tags”]
text = entry[“text”]
# Append tags and text to their respective lists
tags_list.append(tags)
text_list.append(text)
train_inputs = tokenizer(text_list, padding=‘max_length’, truncation=True,max_length=72)
class custom_dataset(torch.utils.data.Dataset):
def init(self, encodings, labels):
self.encodings = encodings
self.labels = labels
def __getitem__(self, idx):
item = {key : torch.tensor(val[idx]) for key, val in self.encodings.items()}
item['labels'] = torch.tensor(self.labels[idx])
return item
def __len__(self):
return len(self.labels)
train_dataset = custom_dataset(train_inputs, tags_list)
training_args = TrainingArguments(output_dir=‘./results’,report_to = ‘none’)
trainer = Trainer(
model=model,
args=training_args,
train_dataset= train_dataset,)
trainer.train()
Can someone please help me figure out what i am doing wrong?