Problem saving QLORA fine tuned model

camiller · July 21, 2024, 1:23am

I’m having an issue with saving the safetensors for my fine tuned model and pushing them to hub. It’s related to, but distinct from this post.

Whenever I try to use the save_pretrained() method I get the same error message as this previous poster. I can push my model to the hub, but the safetensors containing my fine tuned model weights are not uploaded, so when I try to download my model later for inference, I get the same error message “Error while deserializing header: InvalidHeaderDeserialization”.

I understand that this may be due to an incompatibility between PEFT and Pytorch, so can anyone suggest a workaround?

I should add that the safetensors containing the model weights are there after fine tuning, it’s just that they won’t save or be uploaded to Huggingface

My code is reproduced below:

from transformers import LlamaForCausalLM, LlamaTokenizer
import os
import argparse
from transformers import (
    LlamaForCausalLM,
    LlamaTokenizer,
    LlamaConfig,
    set_seed,
    default_data_collator,
    BitsAndBytesConfig,
    Trainer,
    TrainingArguments,
)
from datasets import load_from_disk
import torch
import bitsandbytes as bnb
from huggingface_hub import login, HfFolder
import accelerate
from transformers import DataCollatorWithPadding, Trainer, TrainingArguments

model_id = "psymon/KoLlama2-7b" # sharded weights
tokenizer = LlamaTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )

model = LlamaForCausalLM.from_pretrained(
        model_id,
        use_cache=False,
        device_map="auto",
        quantization_config=bnb_config,
    )

def find_all_linear_names(model):
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, bnb.nn.Linear4bit):
            names = name.split(".")
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if "lm_head" in lora_module_names:  # needed for 16-bit
        lora_module_names.remove("lm_head")
    return list(lora_module_names)

def create_peft_model(model, gradient_checkpointing=True, bf16=True):
    from peft import (
        get_peft_model,
        LoraConfig,
        TaskType,
        prepare_model_for_kbit_training,
    )
    from peft.tuners.lora import LoraLayer

    # prepare int-4 model for training
    model = prepare_model_for_kbit_training(
        model, use_gradient_checkpointing=gradient_checkpointing
    )
    if gradient_checkpointing:
        model.gradient_checkpointing_enable()

    # get lora target modules
    modules = find_all_linear_names(model)
    print(f"Found {len(modules)} modules to quantize: {modules}")

    peft_config = LoraConfig(
        r=64,
        lora_alpha=16,
        target_modules=modules,
        lora_dropout=0.1,
        bias="none",
        task_type=TaskType.CAUSAL_LM,
    )

    model = get_peft_model(model, peft_config)

    # pre-process the model by upcasting the layer norms in float 32 for
    for name, module in model.named_modules():
        if isinstance(module, LoraLayer):
            if bf16:
                module = module.to(torch.bfloat16)
        if "norm" in name:
            module = module.to(torch.float32)
        if "lm_head" in name or "embed_tokens" in name:
            if hasattr(module, "weight"):
                if bf16 and module.weight.dtype == torch.float32:
                    module = module.to(torch.bfloat16)

    model.print_trainable_parameters()
    return model

# create peft config
model = create_peft_model(model, gradient_checkpointing=True, bf16=True)

output_dir = XXXXX
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=8,
    bf16=True,  # Use BF16 if available
    learning_rate=5e-5,
    num_train_epochs=3,
    gradient_checkpointing=True,
    # logging strategies
    logging_dir=f"{output_dir}/logs",
    logging_strategy="steps",
    logging_steps=10,
    save_strategy="no",
)

# Create a data collator
data_collator = DataCollatorWithPadding(tokenizer)

# Initialize the custom Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    data_collator=data_collator,
)

# Start training
trainer.train()

model.merge_and_unload()

model.save_pretrained('model_name')

Topic		Replies	Views
Saving Fine-tune Falcon Model Beginners	0	43	July 15, 2024
Handling Peft Model the right way (save, load, inference) 🤗Transformers	0	142	August 10, 2024
How do I load an SFTTrainer model finetuned falcon-7b-sharded-bf16 using custom dataset, and make prediction with it Beginners	2	1288	August 1, 2023
Can't save my finetuned model Beginners	5	328	November 9, 2024
OSError: dggokul21/Testcase_Generator does not appear to have a file named pytorch_model.bin, tf_model.h5, model.ckpt or flax_model.msgpack Intermediate	0	217	February 21, 2024

Problem saving QLORA fine tuned model

Related topics