Retraining peft model

I want to further fine tune a falcon-7b model finetuned with peft adapters. Here is the code snippet: I am using

import json
import os
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
MODEL_NAME = "tiiuae/falcon-7b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
 
def print_trainable_parameters(model):
    """
   Prints the number of trainable parameters in the model.
   """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
 
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
print_trainable_parameters(model)
 
data = load_dataset("json", data_files="../localGPT/output.json")
 
def generate_prompt(data_point):
    return f"""
   : {data_point["question"]}
   : {data_point["answer"]}
   """.strip()
 
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    return tokenized_full_prompt
 
data = data["train"].shuffle().map(generate_and_tokenize_prompt)
 
OUTPUT_DIR = "outputs"
training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    warmup_ratio=0.05,
    max_steps=80,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    save_total_limit=3,
    output_dir=OUTPUT_DIR,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
)
 
trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
 
model.config.use_cache = False
trainer.train(resume_from_checkpoint=True)
trainer.save_model(os.path.join(OUTPUT_DIR, "checkpoint-2"))
 
PEFT_MODEL = OUTPUT_DIR+"/checkpoint-2"
config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, PEFT_MODEL)
generation_config = model.generation_config
generation_config.max_new_tokens = 20
generation_config.temperature = 0
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
 
DEVICE = "cuda:0"
 
prompt = """
:What is my cat's name?
:
""".strip()
 
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

On further fine tuning the model after loading the model again with resume_from_checkpoint it forgets the previously trained data.
Here is a small example
First time fine tuning:
[
{“question”:“What is my cats name?”,“answer”:“Tom”}
]
Now using generate with “What is my cats name gives” response as “Tom”
Now saving this model and loading it with resume_from_checkpoint for further fine tuning with
[
{“question”:“What is my dogs name?”,“answer”:“Bob”}
]
And asking “What is my cats name?” gives response as “Bob” or sometimes repeats the question

Can someone help me with this?

1 Like