Error: Fine-tune GPT2 model for question answer task

Hi All,

I have a working script for fine-tuning t5 model for question answer task.
I want to adapt the script for fine-tuning gpt2 model for the same question answer task on the same dataset. My dataset looks like:
‘context’ = …
‘question’ = …
‘answer’ = …

I am getting some errors with this script. Please can you help me resolve this error. The error is :

/usr/local/lib/python3.10/dist-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
2208 # remove once script supports set_grad_enabled
2209 no_grad_embedding_renorm(weight, input, max_norm, norm_type)
→ 2210 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
2211
2212

RuntimeError: CUDA error: device-side assert triggered
Compile with TORCH_USE_CUDA_DSA to enable device-side assertions.

The adapted script for fine-tuning gpt2 is :

import sys
import torch
import pandas as pd
import json
import os
os.environ[‘CUDA_LAUNCH_BLOCKING’] = ‘1’
from transformers import GPT2LMHeadModel, GPT2Tokenizer, TrainingArguments, Trainer
from transformers import DataCollatorWithPadding
from datasets import Dataset

global var

ignore_pad_token_for_loss = True
padding = “max_length”

def preprocess_data(question, context, answer, max_length=512):
inputs = tokenizer(
question,
context,
max_length=max_length,
truncation=“only_second”,
padding=“max_length”,
return_attention_mask=True,
add_special_tokens=True,
return_tensors=“pt”
)
input_ids = inputs[‘input_ids’].squeeze()
attention_mask = inputs[‘attention_mask’].squeeze()

# Encode the answer

# Tokenize targets with text_target=...
labels = tokenizer(text_target=answer, max_length=max_length, padding=padding, truncation=True)

# If we are padding here, replace all tokenizer.pad_token_id in the labels by -100 when we want to ignore
# padding in the loss.
if padding == "max_length" and ignore_pad_token_for_loss:
    labels["input_ids"] = [(l if l != tokenizer.pad_token_id else -100) for l in labels["input_ids"]]

inputs["labels"] = labels["input_ids"]

return {
    'input_ids': input_ids,
    'attention_mask': attention_mask,
    'labels': inputs["labels"]

}

model = GPT2LMHeadModel.from_pretrained(“gpt2”)

Set device to GPU if available

device = torch.device(“cuda” if torch.cuda.is_available() else “cpu”)
print(device)

Load custom training dataset as pandas DataFrame

with open(“augmented_data.json”, “r”) as f:
data = json.load(f)
data = pd.DataFrame(data)
data = data.drop(columns=[“id”]) # remove the id column

Convert pandas DataFrame to Hugging Face Dataset format

train_dataset = Dataset.from_pandas(data)
print(train_dataset.shape)

Load custom validation dataset

with open(“validation_set.json”, “r”) as f:
data = json.load(f)
data = pd.DataFrame(data)
data = data.drop(columns=[“id”]) # remove the id column

Convert pandas DataFrame to Hugging Face Dataset format

val_dataset = Dataset.from_pandas(data)
print(val_dataset.shape)

define tokenizer

tokenizer = GPT2Tokenizer.from_pretrained(“gpt2”)
tokenizer.add_special_tokens({‘pad_token’: ‘[PAD]’})

Preprocess dataset

preprocessed_train_dataset =
preprocessed_val_dataset =

for example in train_dataset:
preprocessed_example = preprocess_data(example[“question”], example[“context”], example[“answer”])
preprocessed_train_dataset.append(preprocessed_example)

for example in val_dataset:
preprocessed_example = preprocess_data(example[“question”], example[“context”], example[“answer”])
preprocessed_val_dataset.append(preprocessed_example)

Convert preprocessed dataset to Hugging Face Dataset format

tokenized_train_dataset = Dataset.from_dict(
{key: [example[key] for example in preprocessed_train_dataset] for key in preprocessed_train_dataset[0].keys()})
print(tokenized_train_dataset)

tokenized_val_dataset = Dataset.from_dict(
{key: [example[key] for example in preprocessed_val_dataset] for key in preprocessed_val_dataset[0].keys()})
print(tokenized_val_dataset)

“”“Define training arguments”“”

training_args = TrainingArguments(
output_dir=“qa_model”,
evaluation_strategy=“epoch”,
learning_rate=15e-6,
per_device_train_batch_size=4,
per_device_eval_batch_size=4,
num_train_epochs=100,
weight_decay=0.01,
push_to_hub=False,
save_strategy=‘epoch’,
)

“”“Define data collator”“”
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

“”“Instantiate the Trainer class”“”

trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_train_dataset,
eval_dataset=tokenized_val_dataset,
data_collator=data_collator,
)

trainer.train()

trainer.save_model(“final_result”)
print(" Training is Over ")

Please could anyone help me with this. Thank you