Hi All,
I have participated in the kaggle LLM science competition for that i am using Flan T5 to fine tune my model using lora. I am facing issue when i am trying to train my model, below is my error.
ValueError: Caught ValueError in replica 0 on device 0.
Original Traceback (most recent call last):
File “/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py”, line 64, in _worker
output = module(*input, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1501, in _call_impl
return forward_call(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/peft/peft_model.py”, line 1080, in forward
return self.base_model(
File “/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1501, in _call_impl
return forward_call(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py”, line 1709, in forward
encoder_outputs = self.encoder(
File “/opt/conda/lib/python3.10/site-packages/torch/nn/modules/module.py”, line 1501, in _call_impl
return forward_call(*args, **kwargs)
File “/opt/conda/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py”, line 1023, in forward
batch_size, seq_length = input_shape
ValueError: too many values to unpack (expected 2)
Below is my code i am using to train my model.
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import pandas as pd
from string import Template
from pathlib import Pathimport os
import warnings
warnings.simplefilter(“ignore”)from tqdm.notebook import tqdm
for training
from peft import LoraConfig, get_peft_model, TaskType
from transformers import TrainingArguments
from trl import SFTTrainer, DataCollatorForCompletionOnlyLMfor traing set
from datasets import load_dataset,Dataset
from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
import bitsandbytes as bnb
import numpy as npfrom IPython.display import Markdown, display
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import pandas as pd
import numpy as npllm = “/kaggle/input/googleflan-t5-base/flan-t5-base”
load training data
train_dataset = load_dataset(path=“csv”, data_files=[“/kaggle/input/kaggle-llm-science-exam/train.csv”])
test_dataset = load_dataset(“csv”, data_files=[“/kaggle/input/kaggle-llm-science-exam/test.csv”])original_model=AutoModelForSeq2SeqLM.from_pretrained(llm,torch_dtype=torch.bfloat16)
tokenizer=AutoTokenizer.from_pretrained(llm)lora_config = LoraConfig(
r=32,
lora_alpha=32,
target_modules=[“q”, “v”],
lora_dropout=0.05,
bias=“none”,
task_type=TaskType.SEQ_2_SEQ_LM
)def format_text(example):
“”" fill inputs in promt for a sample “”"template="""Answer the following multiple choice question by giving the most appropriate response. Answer should be one among [A,B,C,D,E] Question: {prompt}\n A) {a}\n B) {b}\n C) {c}\n D) {d}\n E) {e}\n ### Answer: """ prompt = PromptTemplate(template=template,input_variables=['prompt', 'a', 'b', 'c', 'd', 'e']) text = prompt.format(prompt=example['prompt'], a=example['A'], b=example['B'], c=example['C'], d=example['D'], e=example['E']) example['input_ids'] = tokenizer(text, padding="max_length", truncation=True, return_tensors="pt").input_ids example['labels'] = tokenizer(example['answer'], padding='longest', truncation=True,return_tensors="pt").input_ids return example
train_dataset = train_dataset.map(format_text)
tokenized_dataset = train_dataset.remove_columns([‘id’, ‘prompt’, ‘A’, ‘B’,‘C’,‘D’,‘E’,‘answer’])
peft_model = get_peft_model(original_model,
lora_config)
print(print_number_of_trainable_model_parameters(peft_model))output_dir = f’/kaggle/working/peft-dialogue-training-{str(int(time.time()))}’
os.environ[“WANDB_DISABLED”] = “true”
peft_training_args = TrainingArguments(
output_dir=output_dir,
auto_find_batch_size=True,
learning_rate=1e-3, # Higher learning rate than full fine-tuning.
num_train_epochs=1,
logging_steps=1,
max_steps=1
)peft_trainer = Trainer(
model=peft_model,
args=peft_training_args,
train_dataset=tokenized_dataset[“train”]
)peft_trainer.train()
Kaggle link can be found here Kaggle - LLM Science Exam
Any help is appreciated.