Flan T5 fine tuning error

Hi All,

I have participated in the kaggle LLM science competition for that i am using Flan T5 to fine tune my model using lora. I am facing issue when i am trying to train my model, below is my error.

ValueError: Caught ValueError in replica 0 on device 0.
Original Traceback (most recent call last):
File “/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py”, line 64, in _worker
output = module(*input, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1501, in _call_impl
return forward_call(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/peft/peft_model.py”, line 1080, in forward
return self.base_model(
File “/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1501, in _call_impl
return forward_call(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py”, line 1709, in forward
encoder_outputs = self.encoder(
File “/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1501, in _call_impl
return forward_call(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py”, line 1023, in forward
batch_size, seq_length = input_shape
ValueError: too many values to unpack (expected 2)

Below is my code i am using to train my model.

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import pandas as pd
from string import Template
from pathlib import Path

import os

import warnings
warnings.simplefilter(“ignore”)

from tqdm.notebook import tqdm

for training

from peft import LoraConfig, get_peft_model, TaskType
from transformers import TrainingArguments
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

for traing set

from datasets import load_dataset,Dataset
from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
import bitsandbytes as bnb
import numpy as np

from IPython.display import Markdown, display
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import pandas as pd
import numpy as np

llm = “/kaggle/input/googleflan-t5-base/flan-t5-base”

load training data

train_dataset = load_dataset(path=“csv”, data_files=[“/kaggle/input/kaggle-llm-science-exam/train.csv”])
test_dataset = load_dataset(“csv”, data_files=[“/kaggle/input/kaggle-llm-science-exam/test.csv”])

original_model=AutoModelForSeq2SeqLM.from_pretrained(llm,torch_dtype=torch.bfloat16)
tokenizer=AutoTokenizer.from_pretrained(llm)

lora_config = LoraConfig(
r=32,
lora_alpha=32,
target_modules=[“q”, “v”],
lora_dropout=0.05,
bias=“none”,
task_type=TaskType.SEQ_2_SEQ_LM
)

def format_text(example):
“”" fill inputs in promt for a sample “”"

template="""Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A,B,C,D,E]
Question: {prompt}\n
A) {a}\n
B) {b}\n
C) {c}\n
D) {d}\n
E) {e}\n

### Answer:
"""

prompt = PromptTemplate(template=template,input_variables=['prompt', 'a', 'b', 'c', 'd', 'e'])
text = prompt.format(prompt=example['prompt'], 
                     a=example['A'], 
                     b=example['B'], 
                     c=example['C'], 
                     d=example['D'], 
                     e=example['E'])

example['input_ids'] = tokenizer(text, padding="max_length", truncation=True, return_tensors="pt").input_ids
example['labels'] = tokenizer(example['answer'], padding='longest', truncation=True,return_tensors="pt").input_ids

return example

train_dataset = train_dataset.map(format_text)
tokenized_dataset = train_dataset.remove_columns([‘id’, ‘prompt’, ‘A’, ‘B’,‘C’,‘D’,‘E’,‘answer’])
peft_model = get_peft_model(original_model,
lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

output_dir = f’/kaggle/working/peft-dialogue-training-{str(int(time.time()))}’
os.environ[“WANDB_DISABLED”] = “true”
peft_training_args = TrainingArguments(
output_dir=output_dir,
auto_find_batch_size=True,
learning_rate=1e-3, # Higher learning rate than full fine-tuning.
num_train_epochs=1,
logging_steps=1,
max_steps=1
)

peft_trainer = Trainer(
model=peft_model,
args=peft_training_args,
train_dataset=tokenized_dataset[“train”]
)

peft_trainer.train()

Kaggle link can be found here Kaggle - LLM Science Exam

Any help is appreciated.