Expected input batch_size (2048) to match target batch_size (4)

!pip install -q transformers datasets sklearn

imports

import numpy as np
import pandas as pd
import datasets
import torch

from transformers import BertTokenizer, BertForTokenClassification, Trainer, TrainingArguments, AutoConfig
from sklearn.preprocessing import LabelEncoder
from datasets import load_dataset
from random import shuffle
from sklearn.metrics import f1_score
from random import shuffle

dataset = datasets.load_dataset(‘yelp_polarity’,split=‘train[800:1000]’) # [:100] [:1%]

tokenizer = BertTokenizer.from_pretrained(‘bert-base-uncased’, do_lower_case=True)

encoded_dataset = [tokenizer(item[‘text’], return_tensors=“pt”, padding=‘max_length’, truncation=True, max_length=512) for item in dataset]

import torch
for enc_item, item in zip(encoded_dataset, dataset):
enc_item[‘labels’] = torch.LongTensor([item[‘label’]])

for key, val in encoded_dataset[3].items():
print(f’key: {key}, content: {val.size()}’)

model = BertForTokenClassification.from_pretrained(‘bert-base-uncased’)

for item in encoded_dataset:
for key in item:
item[key] = torch.squeeze(item[key])

train_set = encoded_dataset[:100]
test_set = encoded_dataset[100:]

training_args = TrainingArguments(
num_train_epochs=5,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
output_dir=‘results’,
logging_dir=‘logs’,
no_cuda=True,
)

trainer = Trainer(
model=model,
tokenizer=tokenizer,
args=training_args,
train_dataset=train_set
)
trainer.train()

Getting this error when i train
Expected input batch_size (2048) to match target batch_size (4)

can you supply stacktrace of the error?

I am trying other methods now so i dont have it

Which method did you use instead?