Retraining peft model

I want to further fine tune a falcon-7b model finetuned with peft adapters. Here is the code snippet: I am using

import json
import os
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training,
)
from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
 
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 
MODEL_NAME = "tiiuae/falcon-7b"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
 
def print_trainable_parameters(model):
    """
   Prints the number of trainable parameters in the model.
   """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )
 
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
print_trainable_parameters(model)
 
data = load_dataset("json", data_files="../localGPT/output.json")
 
def generate_prompt(data_point):
    return f"""
   : {data_point["question"]}
   : {data_point["answer"]}
   """.strip()
 
def generate_and_tokenize_prompt(data_point):
    full_prompt = generate_prompt(data_point)
    tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
    return tokenized_full_prompt
 
data = data["train"].shuffle().map(generate_and_tokenize_prompt)
 
OUTPUT_DIR = "outputs"
training_args = transformers.TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    num_train_epochs=1,
    warmup_ratio=0.05,
    max_steps=80,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=1,
    save_total_limit=3,
    output_dir=OUTPUT_DIR,
    optim="paged_adamw_8bit",
    lr_scheduler_type="cosine",
)
 
trainer = transformers.Trainer(
    model=model,
    train_dataset=data,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
 
model.config.use_cache = False
trainer.train(resume_from_checkpoint=True)
trainer.save_model(os.path.join(OUTPUT_DIR, "checkpoint-2"))
 
PEFT_MODEL = OUTPUT_DIR+"/checkpoint-2"
config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, PEFT_MODEL)
generation_config = model.generation_config
generation_config.max_new_tokens = 20
generation_config.temperature = 0
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
 
DEVICE = "cuda:0"
 
prompt = """
:What is my cat's name?
:
""".strip()
 
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
    outputs = model.generate(
        input_ids=encoding.input_ids,
        attention_mask=encoding.attention_mask,
        generation_config=generation_config,
    )
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

On further fine tuning the model after loading the model again with resume_from_checkpoint it forgets the previously trained data.
Here is a small example
First time fine tuning:
[
{“question”:“What is my cats name?”,“answer”:“Tom”}
]
Now using generate with “What is my cats name gives” response as “Tom”
Now saving this model and loading it with resume_from_checkpoint for further fine tuning with
[
{“question”:“What is my dogs name?”,“answer”:“Bob”}
]
And asking “What is my cats name?” gives response as “Bob” or sometimes repeats the question

Can someone help me with this?

2 Likes

Having a similar issue. Did you ever figure out how to further train a PEFT model?

I have similar issue with mistral.
I can save and load the peft model when training it from the beginning, but when we resume the training and save the model, the saved model has very poor performance which is very different from when I evaluated it during training.

update: I found a solution

You have to merge the peft model into the base model, and save the merged model (the size of the file will be similar to the base model).
When you want to resume, load this merged model instead of the base model+ the adapter.

Here is how my code looks like:

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# path to base model (in case you start fine-tuning)
# e.g. "mistralai/Mistral-7B-Instruct-v0.1"
# or path to the fine-tuned model in case you want to resume the fine-tuning
model_path = "path/to/model"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    local_files_only=True,
    quantization_config=bnb_config,
    device_map={"": 0},
    return_dict=True,
    low_cpu_mem_usage=True
)

set up model for fine-tuning

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

import bitsandbytes as bnb
def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
  lora_module_names = set()
  for name, module in model.named_modules():
    if isinstance(module, cls):
      names = name.split('.')
      lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names: # needed for 16-bit
      lora_module_names.remove('lm_head')
  return list(lora_module_names)

modules = find_all_linear_names(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)


from trl import SFTTrainer
trainer = SFTTrainer(
 model=model,
.....
)
trainer.train()

to save the model, merge adapter weights and the initial model

new_model_path = "path/to/model/dir"
trainer.model.save_pretrained(new_model_path)

# model path we defined before
# *Important* it might be the base model or the fine-tuned model
# depending on what model you start the fine-tuning with
base_model = AutoModelForCausalLM.from_pretrained(model_path,
                                                  cache_dir=directory_path,
                                                  local_files_only=True,
                                                  #quantization_config=bnb_config,
                                                  device_map={"": 0},
                                                  return_dict=True,
                                                  low_cpu_mem_usage=True,
                                                  torch_dtype=torch.float16,
                                                  )
merged_model = PeftModel.from_pretrained(base_model, new_model_path)
merged_model = merged_model.merge_and_unload()

merged_model.save_pretrained(new_model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.save_pretrained(new_model_path)

Now when you want to resume the training, set
model_path to this new_model_path (where you save your model).

1 Like

Thanks a lot @Polar23 for this response. Dumb question, but let’s say we just do all the training in one session and we used PEFT in training and saved our model. Now we want to use this model; is there any benefit to merging the weights, saving, and using the newly-merged model? Or is it better to use the model without merging?