I wonder how to merge my PEFT adapter with the base model and finally get a new whole model?

I have pefted a model, but only get “extra PEFT weights”. I wonder how can I merge the extra weights to the original model to make a new and whole model?
I try to ask AI but its’ method didn’t work.

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# 加载基础模型
base_model_name = "LLM4Binary/llm4decompile-1.3b-v1.5"  # 替换为你的基础模型
model = AutoModelForCausalLM.from_pretrained(base_model_name)

# 加载多个 LoRA 适配器
lora_adapter_1 = "/root/autodl-tmp/tuned_model"


model = PeftModel.from_pretrained(model, lora_adapter_1)


model = model.merge_and_unload()


model.save_pretrained("./tuned_llm4decompile-1.3b")

It led to following warnings:

(llm4decompile) root@autodl-container-392d4eb87e-ee6d7970:~/autodl-tmp# python merge.py 
Unrecognized keys in `rope_scaling` for 'rope_type'='linear': {'type'}
Traceback (most recent call last):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/peft/peft_model.py", line 824, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'PeftModelForCausalLM' object has no attribute 'merge_and_unload'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/autodl-tmp/merge.py", line 15, in <module>
    model = model.merge_and_unload()
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/peft/peft_model.py", line 828, in __getattr__
    return getattr(self.base_model, name)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'LlamaForCausalLM' object has no attribute 'merge_and_unload'

It seems that it doesn’t support this kind of merging?
It’ll be so kind of you to solve my problem!

1 Like

So the method should be correct, but it seems that there are cases where an error occurs during merging if the model is quantized.

It seems that the problem can be avoided in some cases by using the PEFT side function.

Thank you so much for your help!

But I haven’t succeed by now.
For the script in stackoverflow, it leads to the same warning.
For the following post, it seems that “prepare_model_for_int8_training” isn’t defined, I don’t know if it’s the problem of version or sth.

Here is the improved merge script, it leads the same warning.

# merge base + LoRa models and save the model

from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import sys
import torch

device_map = {"": 0}
lora_dir = "/root/autodl-tmp/tuned_model"
base_model_name = "LLM4Binary/llm4decompile-1.3b-v1.5"
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
model = AutoPeftModelForCausalLM.from_pretrained(lora_dir, device_map=device_map, torch_dtype=torch.bfloat16)


model = model.merge_and_unload()

output_dir = "./output/merged_model"
model.save_pretrained(output_dir)

Here I tried to circumvent following warnings, change my traing script. But it works well untill the changed part begins.

from transformers import *
from peft import *
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from transformers import default_data_collator, get_linear_schedule_with_warmup
from tqdm import tqdm
from datasets import load_dataset
from tensorboard import * 

device = "cuda"
tokenizer_name_or_path = "LLM4Binary/llm4decompile-1.3b-v1.5"
model_name_or_path = "LLM4Binary/llm4decompile-1.3b-v1.5"
dataset_name = "asm2c"
text_column = "asm text"
label_column = "text_label"
max_length = 64
lr = 3e-2
num_epochs = 50
batch_size = 8

from datasets import load_dataset

dataset = load_dataset("json", data_files="./traindata.jsonl")
dataset = dataset["train"].train_test_split(0.2)


tokenizer = AutoTokenizer.from_pretrained("LLM4Binary/llm4decompile-1.3b-v1.5")

def preprocess_function(examples):
    inputs = examples["input"]
    outputs = examples["output"]

    # 合并input和output列
    merged_texts = [f"{input} {output_text}" for input, output_text in zip(inputs, outputs)]
    
    model_inputs = tokenizer(merged_texts, truncation=True, padding="max_length", max_length=512)
    model_inputs["labels"] = model_inputs["input_ids"].copy()  # 设置labels
    return model_inputs

processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["test"]

peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text="What's the souce code of this asm?",
    tokenizer_name_or_path=model_name_or_path,
)
checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/", "_"
)

# creating model
model = AutoModelForCausalLM.from_pretrained("LLM4Binary/llm4decompile-1.3b-v1.5", load_in_8bit=True, torch_dtype=torch.float16, device_map="auto")
model = prepare_model_for_int8_training(model)
peft_model = get_peft_model(model, peft_config)


training_args = TrainingArguments(
    output_dir="./results3",             # 保存模型的目录
    evaluation_strategy="epoch",         # 每个 epoch 进行评估
    save_strategy="epoch",               # 每个 epoch 结束时保存模型              
    learning_rate=2e-5,
    per_device_train_batch_size=4,      # 训练时的batch_size
    per_device_eval_batch_size=8,      # 验证时的batch_size
    logging_steps=10,                    # log 打印的频率
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=False
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    #data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

trainer.train()
'''
trainer.evaluate(eval_dataset)

# 训练结束后手动保存模型
trainer.save_model(output_dir="./tuned_model")  # 保存最终的模型到指定的目录
tokenizer.save_pretrained(save_directory="./tuned_tokenizer")  # 保存tokenizer
'''
lora_adapter = "./lora_adapter"
peft_model.save_pretrained(lora_adapter, save_adapter=True, save_config=True)

model_to_merge = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(model_name_or_path).to("cuda"), lora_adapter)

merged_model = model_to_merge.merge_and_unload()
merged_model.save_pretrained(merged_model)
Traceback (most recent call last):
  File "/root/autodl-tmp/train_pt.py", line 67, in <module>
    model = prepare_model_for_int8_training(model)
NameError: name 'prepare_model_for_int8_training' is not defined
1 Like

The direction should be correct…
It seems that the function has been discontinued.:sweat_smile:

Yes, prepare_model_for_int8_training has been deprecated for quite some time, with PEFT v0.10.0, it has been removed. Please use prepare_model_for_kbit_training instead.

Thank you :joy:
I should search that warning on github.
I’ll keep on trying.

1 Like

God, prepare_model_for_kbit_training dosen’t support quantized model :scream:

(llm4decompile) root@autodl-container-392d4eb87e-ee6d7970:~/autodl-tmp# python train_pt.py 
2025-02-06 15:17:44.694429: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-06 15:17:44.711058: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
WARNING: All log messages before absl::InitializeLog() is called are written to STDERR
E0000 00:00:1738826264.729778    1236 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738826264.735138    1236 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-06 15:17:44.754843: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Traceback (most recent call last):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1817, in _get_module
    return importlib.import_module("." + module_name, self.__name__)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/importlib/__init__.py", line 127, in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
  File "<frozen importlib._bootstrap>", line 1030, in _gcd_import
  File "<frozen importlib._bootstrap>", line 1007, in _find_and_load
  File "<frozen importlib._bootstrap>", line 986, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 680, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 850, in exec_module
  File "<frozen importlib._bootstrap>", line 228, in _call_with_frames_removed
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/generation/streamers.py", line 231, in <module>
    class AsyncTextIteratorStreamer(TextStreamer):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/generation/streamers.py", line 285, in AsyncTextIteratorStreamer
    self, tokenizer: "AutoTokenizer", skip_prompt: bool = False, timeout: float | None = None, **decode_kwargs
TypeError: unsupported operand type(s) for |: 'type' and 'NoneType'

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/root/autodl-tmp/train_pt.py", line 1, in <module>
    from transformers import *
  File "<frozen importlib._bootstrap>", line 1053, in _handle_fromlist
  File "<frozen importlib._bootstrap>", line 1055, in _handle_fromlist
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1806, in __getattr__
    value = getattr(module, name)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1805, in __getattr__
    module = self._get_module(self._class_to_module[name])
  File "/root/miniconda3/envs/llm4decompile/lib/python3.9/site-packages/transformers/utils/import_utils.py", line 1819, in _get_module
    raise RuntimeError(
RuntimeError: Failed to import transformers.generation.streamers because of the following error (look up to see its traceback):
unsupported operand type(s) for |: 'type' and 'NoneType'

/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/transformers/training_args.py:1575: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
  warnings.warn(
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Traceback (most recent call last):
  File "/root/autodl-tmp/train_pt.py", line 85, in <module>
    trainer = Trainer(
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/transformers/utils/deprecation.py", line 165, in wrapped_func
    return func(*args, **kwargs)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/transformers/trainer.py", line 553, in __init__
    raise ValueError(
ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details
from transformers import *
from peft import *
import torch
from datasets import load_dataset
import os
from torch.utils.data import DataLoader
from transformers import default_data_collator, get_linear_schedule_with_warmup
from tqdm import tqdm
from datasets import load_dataset
from tensorboard import * 

device = "cuda"
tokenizer_name_or_path = "LLM4Binary/llm4decompile-1.3b-v1.5"
model_name_or_path = "LLM4Binary/llm4decompile-1.3b-v1.5"
dataset_name = "asm2c"
text_column = "asm text"
label_column = "text_label"
max_length = 64
lr = 3e-2
num_epochs = 50
batch_size = 8

from datasets import load_dataset

dataset = load_dataset("json", data_files="./traindata.jsonl")
dataset = dataset["train"].train_test_split(0.2)


tokenizer = AutoTokenizer.from_pretrained("LLM4Binary/llm4decompile-1.3b-v1.5")

def preprocess_function(examples):
    inputs = examples["input"]
    outputs = examples["output"]

    # 合并input和output列
    merged_texts = [f"{input} {output_text}" for input, output_text in zip(inputs, outputs)]
    
    model_inputs = tokenizer(merged_texts, truncation=True, padding="max_length", max_length=512)
    model_inputs["labels"] = model_inputs["input_ids"].copy()  # 设置labels
    return model_inputs

processed_datasets = dataset.map(
    preprocess_function,
    batched=True,
    num_proc=1,
    remove_columns=dataset["train"].column_names,
    load_from_cache_file=False,
    desc="Running tokenizer on dataset",
)

train_dataset = processed_datasets["train"]
eval_dataset = processed_datasets["test"]

peft_config = PromptTuningConfig(
    task_type=TaskType.CAUSAL_LM,
    prompt_tuning_init=PromptTuningInit.TEXT,
    num_virtual_tokens=8,
    prompt_tuning_init_text="What's the souce code of this asm?",
    tokenizer_name_or_path=model_name_or_path,
)
checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
    "/", "_"
)

# creating model
model = AutoModelForCausalLM.from_pretrained("LLM4Binary/llm4decompile-1.3b-v1.5", load_in_8bit=True, torch_dtype=torch.float16, device_map="auto")
model = prepare_model_for_kbit_training(model)
peft_model = get_peft_model(model, peft_config)


training_args = TrainingArguments(
    output_dir="./results4",             # 保存模型的目录
    evaluation_strategy="epoch",         # 每个 epoch 进行评估
    save_strategy="epoch",               # 每个 epoch 结束时保存模型              
    learning_rate=2e-5,
    per_device_train_batch_size=4,      # 训练时的batch_size
    per_device_eval_batch_size=8,      # 验证时的batch_size
    logging_steps=10,                    # log 打印的频率
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=False
)


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    #data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer, padding=True)
)

trainer.train()
'''
trainer.evaluate(eval_dataset)

# 训练结束后手动保存模型
trainer.save_model(output_dir="./tuned_model")  # 保存最终的模型到指定的目录
tokenizer.save_pretrained(save_directory="./tuned_tokenizer")  # 保存tokenizer
'''
lora_adapter = "./lora_adapter"
peft_model.save_pretrained(lora_adapter, save_adapter=True, save_config=True)

model_to_merge = PeftModel.from_pretrained(AutoModelForCausalLM.from_pretrained(model_name_or_path).to("cuda"), lora_adapter)

merged_model = model_to_merge.merge_and_unload()
merged_model.save_pretrained(merged_model)

“merge_and_unload” method seems not to be supported.

(llm4decompile) root@autodl-container-392d4eb87e-ee6d7970:~/autodl-tmp# python merge.py 
PeftModelForCausalLM(
  (base_model): LlamaForCausalLM(
    (model): LlamaModel(
      (embed_tokens): Embedding(32256, 2048)
      (layers): ModuleList(
        (0-23): 24 x LlamaDecoderLayer(
          (self_attn): LlamaAttention(
            (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (k_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (v_proj): Linear(in_features=2048, out_features=2048, bias=False)
            (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          )
          (mlp): LlamaMLP(
            (gate_proj): Linear(in_features=2048, out_features=5504, bias=False)
            (up_proj): Linear(in_features=2048, out_features=5504, bias=False)
            (down_proj): Linear(in_features=5504, out_features=2048, bias=False)
            (act_fn): SiLU()
          )
          (input_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
          (post_attention_layernorm): LlamaRMSNorm((2048,), eps=1e-06)
        )
      )
      (norm): LlamaRMSNorm((2048,), eps=1e-06)
      (rotary_emb): LlamaRotaryEmbedding()
    )
    (lm_head): Linear(in_features=2048, out_features=32256, bias=False)
  )
  (prompt_encoder): ModuleDict(
    (default): PromptEmbedding(
      (embedding): Embedding(8, 2048)
    )
  )
  (word_embeddings): Embedding(32256, 2048)
)
Traceback (most recent call last):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/peft/peft_model.py", line 824, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'PeftModelForCausalLM' object has no attribute 'merge_and_unload'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/autodl-tmp/merge.py", line 15, in <module>
    model = model.merge_and_unload()
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/peft/peft_model.py", line 828, in __getattr__
    return getattr(self.base_model, name)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'LlamaForCausalLM' object has no attribute 'merge_and_unload'
1 Like

TypeError: unsupported operand type(s) for |: ‘type’ and ‘NoneType’

I think it’s probably because of Python 3.9…
I know because I’m also using 3.9.:sob:

Ah, I noticed that and changed to python3.10, I made a mistake to copy it here.
What matters is the warnings below :sob:

1 Like

I think this is the situation now…

or

I tried many time and found a point:
When I search their lora_weight to be merged, it looks like


But I don’t have that “bin” model, I just got a .safetensors file. Maybe it’s the key of these warnings?
After I train, it straightly output a .safetensors file and I don’t know how to merge it to the base model…… :tired_face:

1 Like

Once you’ve loaded both the safetensors file and the bin file, they’re basically the same. The bin file is essentially dangerous because it’s a pickle in Python, or in other words, it’s an object that’s been pickled :cucumber:, so the safetensors file is basically a version that has had as much of the dangerous parts removed as possible.

Since you didn’t trip up at the loading stage, I think the cause is something that happens even with safetensors.
Sometimes trying it once with another small model can be a shortcut to getting out of the maze.

1 Like

Or is that LoRA? Maybe you need a base model for the base model. Repeat the merge twice.

Well I couldn’t fully understand “a base model for the base model” , what does that mean?
I searched for other ways out, but they all failed.

This one offers a script, but I get “TypeError: not a string” on “tokenizer = AutoTokenizer.from_pretrained(”.
It do is a string!

from peft import PeftModel
from transformers import *
import torch
"""
使用该脚本,将lora的权重合并大base model中
"""
import os

def merge_lora_to_base_model():
    model_name_or_path = 'LLM4Binary/llm4decompile-1.3b-v1.5'
    adapter_name_or_path = "P1sc3s007/llm4decompile-pt"
    save_path = './merged'

    config = AutoConfig.from_pretrained(model_name_or_path)
    tokenizer = LlamaTokenizer.from_pretrained(
        adapter_name_or_path,
        trust_remote_code=True,
        # llama不支持fast
        use_fast=False# if config.model_type == 'llama' else True
    )
    model = AutoModelForCausalLM.from_pretrained(
        model_name_or_path,
        trust_remote_code=True,
        low_cpu_mem_usage=True,
        torch_dtype=torch.float16,
        # device_map='auto',
        device_map={'': 'cpu'}
    )
    model = PeftModel.from_pretrained(model, adapter_name_or_path, device_map={'': 'cpu'})
    model = model.merge_and_unload()

    tokenizer.save_pretrained(save_path)
    model.save_pretrained(save_path)

if __name__ == '__main__':
    merge_lora_to_base_model()

I try to solve it and find this:

and got this error:

TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

This one

suggests “missing this file ( tokenizer.model )”

and I found “Multi-LoRA in PEFT is tricky and the current implementation does not work reliably in all cases.” in

:mending_heart:

1 Like

“a base model for the base model” , what does that mean?

This itself is LoRA, and it does not seem to be the model itself. The base model is as follows.

yeah
“LLM4Binary/llm4decompile-1.3b-v1.5” is the original model、the base model.
“P1sc3s007/llm4decompile-pt” is the LORA(in fact I use prompt-tuning), the tuned model. “/root/autodl-tmp/tuned_model” is its’ local folder. “P1sc3s007/llm4decompile-pt” is what I updated to HG.

1 Like

Then, it should be okay.

Or rather, it seems that LLM4Binary/llm4decompile-1.3b-v1.5 is not quantized.:thinking: The reason you are getting quantization errors is either because quantization is being applied when loading, or your LoRA is quantized. The way to solve quantization… I think you can solve simple ones just by specifying torch_dtype.

#model = PeftModel.from_pretrained(model, adapter_name_or_path, device_map={'': 'cpu'})
model = PeftModel.from_pretrained(model, adapter_name_or_path, device_map={'': 'cpu'}, torch_dtype=torch.float16)

It goes back here :sob:

AttributeError: ‘PeftModelForCausalLM’ object has no attribute ‘merge_and_unload’
AttributeError: ‘LlamaForCausalLM’ object has no attribute ‘merge_and_unload’

even with torch_dtype=torch.float16

import torch
from peft import PeftModel
import transformers
import os, time
import tempfile
from transformers import *


BASE_MODEL = "LLM4Binary/llm4decompile-1.3b-v1.5"
LORA_WEIGHTS = "P1sc3s007/llm4decompile-pt"

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
    
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="offload", 
)
    
model = PeftModel.from_pretrained(
    model, 
    LORA_WEIGHTS, 
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="offload", 

)

model = model.merge_and_unload()
model.save_pretrained("./merged_llm")

It seems go well before model = model.merge_and_unload()

(llm4decompile) root@autodl-container-392d4eb87e-ee6d7970:~/autodl-tmp# HF_ENDPOINT=https://hf-mirror.com python merge3.py 
loading file tokenizer.model from cache at None
loading file tokenizer.json from cache at /root/.cache/huggingface/hub/models--LLM4Binary--llm4decompile-1.3b-v1.5/snapshots/06291f5a44ffb97e437ff5a8e01918ae84a10ee4/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--LLM4Binary--llm4decompile-1.3b-v1.5/snapshots/06291f5a44ffb97e437ff5a8e01918ae84a10ee4/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--LLM4Binary--llm4decompile-1.3b-v1.5/snapshots/06291f5a44ffb97e437ff5a8e01918ae84a10ee4/tokenizer_config.json
loading file chat_template.jinja from cache at None
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--LLM4Binary--llm4decompile-1.3b-v1.5/snapshots/06291f5a44ffb97e437ff5a8e01918ae84a10ee4/config.json
Model config LlamaConfig {
  "_name_or_path": "LLM4Binary/llm4decompile-1.3b-v1.5",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 32013,
  "eos_token_id": 32014,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 5504,
  "max_position_embeddings": 16384,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "num_key_value_heads": 16,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": {
    "factor": 4.0,
    "rope_type": "linear",
    "type": "linear"
  },
  "rope_theta": 100000,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.48.2",
  "use_cache": false,
  "vocab_size": 32256
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--LLM4Binary--llm4decompile-1.3b-v1.5/snapshots/06291f5a44ffb97e437ff5a8e01918ae84a10ee4/model.safetensors
Instantiating LlamaForCausalLM model under default dtype torch.float16.
Generate config GenerationConfig {
  "bos_token_id": 32013,
  "eos_token_id": 32014,
  "use_cache": false
}

All model checkpoint weights were used when initializing LlamaForCausalLM.

All the weights of LlamaForCausalLM were initialized from the model checkpoint at LLM4Binary/llm4decompile-1.3b-v1.5.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--LLM4Binary--llm4decompile-1.3b-v1.5/snapshots/06291f5a44ffb97e437ff5a8e01918ae84a10ee4/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 32013,
  "eos_token_id": 32014
}

adapter_config.json: 524B [00:00, 2.30MB/s]                                                                                                                                                                        
adapter_model.safetensors: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 65.7k/65.7k [00:00<00:00, 134kB/s]
Traceback (most recent call last):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/peft/peft_model.py", line 824, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'PeftModelForCausalLM' object has no attribute 'merge_and_unload'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/autodl-tmp/merge3.py", line 31, in <module>
    model = model.merge_and_unload()
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/peft/peft_model.py", line 828, in __getattr__
    return getattr(self.base_model, name)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'LlamaForCausalLM' object has no attribute 'merge_and_unload'
1 Like

That’s probably the right way to do it. The merge model was saved using the code below. There must be something wrong with the details.

from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftConfig, get_peft_model
import torch

model_id = "unsloth/Qwen2.5-Coder-1.5B-Instruct-bnb-4bit"
lora_id = "ai-blond/Qwen-Qwen2.5-Coder-1.5B-Instruct-lora"
model_dir = "merged_model"

peft_config = PeftConfig.from_pretrained(lora_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = get_peft_model(model, peft_config)
model = model.merge_and_unload()
model.save_pretrained(model_dir)
tokenizer.save_pretrained(model_dir)

The same error. but seems close. I may have to check out the peft version or sth, I 'm trying.

(llm4decompile) root@autodl-container-392d4eb87e-ee6d7970:~/autodl-tmp# HF_ENDPOINT=https://hf-mirror.com python merge4.py 
Traceback (most recent call last):
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/peft/peft_model.py", line 824, in __getattr__
    return super().__getattr__(name)  # defer to nn.Module's logic
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'PeftModelForCausalLM' object has no attribute 'merge_and_unload'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/root/autodl-tmp/merge4.py", line 13, in <module>
    model = model.merge_and_unload()
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/peft/peft_model.py", line 828, in __getattr__
    return getattr(self.base_model, name)
  File "/root/miniconda3/envs/llm4decompile/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1695, in __getattr__
    raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
AttributeError: 'LlamaForCausalLM' object has no attribute 'merge_and_unload'
1 Like