I am finetuning Llama for binary sequence classification with PEFT & Lora using the Trainer class. Loss seems to decrease nicely and accuracy on the validation data reaches ~90% in the final epoch: { "epoch": 4.98, "eval_accuracy": 0.9346576058546785, "eval_loss": 0.18449442088603973, "eval_runtime": 496.2064, "eval_samples_per_second": 7.711, "eval_steps_per_second": 0.965, "step": 595 }
After the training is over I save the model as follows:`
model.state_dict = (
lambda self, *_, **__: get_peft_model_state_dict(self, old_state_dict())
).__get__(model, type(model))
model = torch.compile(model)
trainer.train()
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
torch.save(model.state_dict(), "torch_openllama_saver")
I subsequently try to reload the model and reproduce the evaluation result on the same validation set. However, I get an accuracy of 55% this time. I load the model as follows:
CUTOFF_LEN = 512
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
try:
if torch.backends.mps.is_available():
device = "mps"
except: # noqa: E722
pass
import sys
import textwrap
import torch
from peft import PeftModel, PeftConfig
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
from transformers.generation.utils import GreedySearchDecoderOnlyOutput
if torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
try:
if torch.backends.mps.is_available():
device = "mps"
except: # noqa: E722
pass
load_8bit = True
# Load peft config for pre-trained checkpoint etc.
peft_model_id = "results/experiments_openllama"
config = PeftConfig.from_pretrained(peft_model_id)
config.inference_mode = True
base_model = config.base_model_name_or_path
lora_weights = peft_model_id
if device == "cuda":
model = LlamaForSequenceClassification.from_pretrained(
base_model,
load_in_8bit=load_8bit,
torch_dtype=torch.float16,
device_map="auto",
)
model = PeftModel.from_pretrained(
model, lora_weights, torch_dtype=torch.float16, config=config
)
elif device == "mps":
model = LlamaForSequenceClassification.from_pretrained(
base_model, device_map={"": device}, torch_dtype=torch.float16,
)
model = PeftModel.from_pretrained(
model,
lora_weights,
device_map={"": device},
torch_dtype=torch.float16,
config=config,
) # must set inference_mode=True
else:
model = LlamaForSequenceClassification.from_pretrained(
base_model, device_map={"": device}, low_cpu_mem_usage=True
)
model = PeftModel.from_pretrained(
model, lora_weights, device_map={"": device}, config=config
)
model.load_state_dict(
torch.load(os.path.join(lora_weights, "adapter_model.bin")), strict=False
)
tokenizer = LlamaTokenizer.from_pretrained(peft_model_id)
model.config.pad_token_id = tokenizer.pad_token_id
model.config.bos_token_id = tokenizer.bos_token_id
model.config.eos_token_id = tokenizer.eos_token_id
model.config.sep_token_id = tokenizer.sep_token_id
model.config.unk_token_id = tokenizer.pad_token_id
Why is that & what am I doing wrong? Is the original evaluation accuracy incorrect? Or is there an issue with loading the trained model weights?