from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType
import torch
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype= torch.float16
)
model = AutoModelForSequenceClassification.from_pretrained(
"distilbert-base-uncased" ,
quantization_config = quantization_config
)
lora_config = LoraConfig(
r=8,
lora_alpha=8,
target_modules=["q_lin", "k_lin", "v_lin", "out_lin"],
lora_dropout=0.1,
task_type=TaskType.SEQ_CLS,
bias="none"
)
model = get_peft_model(model, lora_config)
i tried to quantize and fine-tune distilbert-base-uncased using BnB and peft but I got this error message.
low_cpu_mem_usage` was None, now default to True since model is quantized.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-30-df44ef690a88> in <cell line: 26>()
24 )
25
---> 26 model = get_peft_model(model, lora_config)
5 frames
/usr/local/lib/python3.10/dist-packages/peft/mapping.py in get_peft_model(model, peft_config, adapter_name, mixed, autocast_adapter_dtype, revision)
191 if peft_config.is_prompt_learning:
192 peft_config = _prepare_prompt_learning_config(peft_config, model_config)
--> 193 return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](
194 model, peft_config, adapter_name=adapter_name, autocast_adapter_dtype=autocast_adapter_dtype
195 )
/usr/local/lib/python3.10/dist-packages/peft/peft_model.py in __init__(self, model, peft_config, adapter_name, **kwargs)
1396
1397 # to make sure classifier layer is trainable; this may add a new ModulesToSaveWrapper
-> 1398 _set_trainable(self, adapter_name)
1399
1400 def add_adapter(self, adapter_name: str, peft_config: PeftConfig) -> None:
/usr/local/lib/python3.10/dist-packages/peft/utils/other.py in _set_trainable(model, adapter_name)
401 target.set_adapter(target.active_adapter)
402 else:
--> 403 new_module = ModulesToSaveWrapper(target, adapter_name)
404 new_module.set_adapter(adapter_name)
405 setattr(parent, target_name, new_module)
/usr/local/lib/python3.10/dist-packages/peft/utils/other.py in __init__(self, module_to_save, adapter_name)
197 self._active_adapter = adapter_name
198 self._disable_adapters = False
--> 199 self.update(adapter_name)
200 self.check_module()
201
/usr/local/lib/python3.10/dist-packages/peft/utils/other.py in update(self, adapter_name)
261 self.original_module.requires_grad_(False)
262 if adapter_name == self.active_adapter:
--> 263 self.modules_to_save[adapter_name].requires_grad_(True)
264
265 def _create_new_hook(self, old_hook):
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in requires_grad_(self, requires_grad)
2885 """
2886 for p in self.parameters():
-> 2887 p.requires_grad_(requires_grad)
2888 return self
2889
RuntimeError: only Tensors of floating point dtype can require gradients
it appears only when choosing task_type = TaskType.SEQ_CLS or TOKEN_CLS inside LoraConfig.
if I choose any other type like QUESTION_ANS or CAUSAL_LM no error occurs.