I want to further fine tune a falcon-7b model finetuned with peft adapters. Here is the code snippet: I am using
import json
import os
import bitsandbytes as bnb
import pandas as pd
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset
from peft import (
LoraConfig,
PeftConfig,
PeftModel,
get_peft_model,
prepare_model_for_kbit_training,
)
from transformers import (
AutoConfig,
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
MODEL_NAME = "tiiuae/falcon-7b"
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
trust_remote_code=True,
quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token
def print_trainable_parameters(model):
"""
Prints the number of trainable parameters in the model.
"""
trainable_params = 0
all_param = 0
for _, param in model.named_parameters():
all_param += param.numel()
if param.requires_grad:
trainable_params += param.numel()
print(
f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
)
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["query_key_value"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, config)
print_trainable_parameters(model)
data = load_dataset("json", data_files="../localGPT/output.json")
def generate_prompt(data_point):
return f"""
: {data_point["question"]}
: {data_point["answer"]}
""".strip()
def generate_and_tokenize_prompt(data_point):
full_prompt = generate_prompt(data_point)
tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
return tokenized_full_prompt
data = data["train"].shuffle().map(generate_and_tokenize_prompt)
OUTPUT_DIR = "outputs"
training_args = transformers.TrainingArguments(
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
num_train_epochs=1,
warmup_ratio=0.05,
max_steps=80,
learning_rate=2e-4,
fp16=True,
logging_steps=1,
save_total_limit=3,
output_dir=OUTPUT_DIR,
optim="paged_adamw_8bit",
lr_scheduler_type="cosine",
)
trainer = transformers.Trainer(
model=model,
train_dataset=data,
args=training_args,
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train(resume_from_checkpoint=True)
trainer.save_model(os.path.join(OUTPUT_DIR, "checkpoint-2"))
PEFT_MODEL = OUTPUT_DIR+"/checkpoint-2"
config = PeftConfig.from_pretrained(PEFT_MODEL)
model = AutoModelForCausalLM.from_pretrained(
config.base_model_name_or_path,
return_dict=True,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token
model = PeftModel.from_pretrained(model, PEFT_MODEL)
generation_config = model.generation_config
generation_config.max_new_tokens = 20
generation_config.temperature = 0
generation_config.top_p = 0.7
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id
DEVICE = "cuda:0"
prompt = """
:What is my cat's name?
:
""".strip()
encoding = tokenizer(prompt, return_tensors="pt").to(DEVICE)
with torch.inference_mode():
outputs = model.generate(
input_ids=encoding.input_ids,
attention_mask=encoding.attention_mask,
generation_config=generation_config,
)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
On further fine tuning the model after loading the model again with resume_from_checkpoint it forgets the previously trained data.
Here is a small example
First time fine tuning:
[
{“question”:“What is my cats name?”,“answer”:“Tom”}
]
Now using generate with “What is my cats name gives” response as “Tom”
Now saving this model and loading it with resume_from_checkpoint for further fine tuning with
[
{“question”:“What is my dogs name?”,“answer”:“Bob”}
]
And asking “What is my cats name?” gives response as “Bob” or sometimes repeats the question
Can someone help me with this?