I performed fine tuning flan-t5-small using peft. here is my code
#loading dataset
huggingface_dataset_name = "knkarthick/dialogsum"
dataset = load_dataset(huggingface_dataset_name)
dataset
#loading original model
model_name='google/flan-t5-base'
original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to('cuda')
tokenizer = AutoTokenizer.from_pretrained(model_name)
def tokenize_function(example):
start_prompt = 'Summarize the following conversation.\n\n'
end_prompt = '\n\nSummary: '
prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids
return example
#The dataset actually contains 3 diff splits: train, validation, test.
#The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])
#peft config
from peft import LoraConfig, get_peft_model, TaskType
lora_config = LoraConfig(
r=32, # Rank
lora_alpha=32,
target_modules=["q", "v"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)
peft_model = get_peft_model(original_model,
lora_config).to('cuda')
#train config
output_dir = f'/kaggle/working/peft-dialogue-summary-lora-training-{str(int(time.time()))}'
peft_training_args = TrainingArguments(
output_dir=output_dir,
per_device_train_batch_size=8,
gradient_accumulation_steps=4,
learning_rate=1e-3, # Higher learning rate than full fine-tuning.
num_train_epochs=1,
save_strategy="epoch",
logging_steps=15,
)
peft_trainer = Trainer(
model=peft_model,
args=peft_training_args,
train_dataset=tokenized_datasets["train"],
)
#train and error happened
peft_trainer.train()
This train process was going on to the last steps but at last such runtime error happened.
RuntimeError: Some tensors share memory, this will lead to duplicate memory on disk and potential differences when loading them again: [{‘base_model.model.decoder.embed_tokens.weight’, ‘base_model.model.shared.weight’, ‘base_model.model.encoder.embed_tokens.weight’}]. A potential way to correctly save your model is to use save_model
. More information at Torch shared tensors