Saving Fine-tune Falcon Model

I’m trying to save my model so it won’t need to redownload the base model every time I want to use it but nothing seems to work for me, I would love your help with it.

The following parameters are used for the training:

hf_model_name = "tiiuae/falcon-7b-instruct"
dir_path = 'Tiiuae-falcon-7b-instruct'
model_name_is = f"peft-training"
output_dir = f'{dir_path}/{model_name_is}'
logs_dir = f'{dir_path}/logs'
model_final_path = f"{output_dir}/final_model/"
EPOCHS = 3500
LOGS = 1
SAVES = 700
EVALS = EPOCHS / 100
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
)
model = AutoModelForCausalLM.from_pretrained(
        "tiiuae/falcon-7b-instruct",
        quantization_config=bnb_config,
        device_map={"": 0},
        trust_remote_code=False
)
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.05, # 0.1
    r=64,
    bias="lora_only", # none
    task_type="CAUSAL_LM",
    target_modules=[
        "query_key_value"
    ],
)
model.config.use_cache = False
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-7b-instruct", trust_remote_code=False)
tokenizer.pad_token = tokenizer.eos_token
training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    optim='paged_adamw_32bit',
    max_steps=EPOCHS,
    save_steps=SAVES,
    logging_steps=LOGS,
    logging_dir=logs_dir,
    eval_steps=EVALS,
    evaluation_strategy="steps",
    fp16=True,
    learning_rate=0.001,
    max_grad_norm=0.3,
    warmup_ratio=0.15, # 0.03
    lr_scheduler_type="constant",
    disable_tqdm=True,
)
model.config.use_cache = False
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=448,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=True,
)
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)
train_result = trainer.train()

And the saving of it I did like so:

metrics = train_result.metrics
max_train_samples = len(train_dataset)
metrics["train_samples"] = min(max_train_samples, len(train_dataset))
# save train results
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
# compute evaluation results
metrics = trainer.evaluate()
max_val_samples = len(eval_dataset)
metrics["eval_samples"] = min(max_val_samples, len(eval_dataset))
# save evaluation results
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

model.save_pretrained(model_final_path)

Now I’ve tried so many different ways to load it or load and save it in a different ways again, but nothing seems to work for me, I really need your help here