Gpt2 model training , Loss nan

I am following https://discuss.huggingface.co/t/fine-tuning-gpt2-for-question-answering/31895 tutorial of huggingface library. Since I have a large dataset so I have used Dataloader and created batches. But am facing this error. Here is the process of creating batches,

class FeedbackEssentials(Dataset):
def init(self, qa_pairs, tokenizer, max_length):
self.qa_pairs = qa_pairs
self.tokenizer = tokenizer
self.max_length = max_length

def len(self):
return len(self.qa_pairs)

def getitem(self, idx):
question = self.qa_pairs[idx][0]
text = f"{question} {self.tokenizer.eos_token}"
input_ids = self.tokenizer.encode(text, add_special_tokens=True, max_length=self.max_length, padding=‘max_length’, truncation=True)
attention_mask = [1] * len(input_ids) # Assuming all tokens should be attended to

return {
    'input_ids': torch.tensor(input_ids),
    'attention_mask': torch.tensor(attention_mask)
}

def text_manipulation(train_dataset):
column1_values = train_dataset[‘Total Marks’].values
column2_values = train_dataset[‘Coding’].values
listOfLists = [[pair[0], pair[1]] for pair in zip(column1_values, column2_values)]

text = “”
for feedback in listOfLists:
text += f"{feedback[0]} {feedback[1]} {tokenizer.eos_token}"
return text

training_dataset = text_manipulation(dataset)
max_length_training = max(len(tokenizer.encode(qa_pair[0],add_special_tokens=True)) for qa_pair in training_dataset)
dataset_training = FeedbackEssentials(training_dataset, tokenizer, max_length_training)
batch_size = 4
dataloader = DataLoader(dataset_training, batch_size=batch_size, shuffle=True)

optimizer = torch.optim.AdamW(model.parameters(), lr=5e-5)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.9)