Retraining peft model

I have similar issue with mistral.
I can save and load the peft model when training it from the beginning, but when we resume the training and save the model, the saved model has very poor performance which is very different from when I evaluated it during training.

update: I found a solution

You have to merge the peft model into the base model, and save the merged model (the size of the file will be similar to the base model).
When you want to resume, load this merged model instead of the base model+ the adapter.

Here is how my code looks like:

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

# path to base model (in case you start fine-tuning)
# e.g. "mistralai/Mistral-7B-Instruct-v0.1"
# or path to the fine-tuned model in case you want to resume the fine-tuning
model_path = "path/to/model"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    model_path,
    local_files_only=True,
    quantization_config=bnb_config,
    device_map={"": 0},
    return_dict=True,
    low_cpu_mem_usage=True
)

set up model for fine-tuning

from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

import bitsandbytes as bnb
def find_all_linear_names(model):
  cls = bnb.nn.Linear4bit #if args.bits == 4 else (bnb.nn.Linear8bitLt if args.bits == 8 else torch.nn.Linear)
  lora_module_names = set()
  for name, module in model.named_modules():
    if isinstance(module, cls):
      names = name.split('.')
      lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names: # needed for 16-bit
      lora_module_names.remove('lm_head')
  return list(lora_module_names)

modules = find_all_linear_names(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=modules,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)


from trl import SFTTrainer
trainer = SFTTrainer(
 model=model,
.....
)
trainer.train()

to save the model, merge adapter weights and the initial model

new_model_path = "path/to/model/dir"
trainer.model.save_pretrained(new_model_path)

# model path we defined before
# *Important* it might be the base model or the fine-tuned model
# depending on what model you start the fine-tuning with
base_model = AutoModelForCausalLM.from_pretrained(model_path,
                                                  cache_dir=directory_path,
                                                  local_files_only=True,
                                                  #quantization_config=bnb_config,
                                                  device_map={"": 0},
                                                  return_dict=True,
                                                  low_cpu_mem_usage=True,
                                                  torch_dtype=torch.float16,
                                                  )
merged_model = PeftModel.from_pretrained(base_model, new_model_path)
merged_model = merged_model.merge_and_unload()

merged_model.save_pretrained(new_model_path)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.save_pretrained(new_model_path)

Now when you want to resume the training, set
model_path to this new_model_path (where you save your model).

1 Like