Error while fine-tuning distilbert model

from transformers import AutoModelForSequenceClassification
from peft import LoraConfig, get_peft_model, TaskType
import torch

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype= torch.float16
)

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased" ,
    quantization_config = quantization_config
  )

lora_config = LoraConfig(
    r=8,
    lora_alpha=8,
    target_modules=["q_lin", "k_lin", "v_lin", "out_lin"],
    lora_dropout=0.1,
    task_type=TaskType.SEQ_CLS,
    bias="none"
)

model = get_peft_model(model, lora_config)

i tried to quantize and fine-tune distilbert-base-uncased using BnB and peft but I got this error message.

low_cpu_mem_usage` was None, now default to True since model is quantized.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-30-df44ef690a88> in <cell line: 26>()
     24 )
     25 
---> 26 model = get_peft_model(model, lora_config)

5 frames
/usr/local/lib/python3.10/dist-packages/peft/mapping.py in get_peft_model(model, peft_config, adapter_name, mixed, autocast_adapter_dtype, revision)
    191     if peft_config.is_prompt_learning:
    192         peft_config = _prepare_prompt_learning_config(peft_config, model_config)
--> 193     return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](
    194         model, peft_config, adapter_name=adapter_name, autocast_adapter_dtype=autocast_adapter_dtype
    195     )

/usr/local/lib/python3.10/dist-packages/peft/peft_model.py in __init__(self, model, peft_config, adapter_name, **kwargs)
   1396 
   1397         # to make sure classifier layer is trainable; this may add a new ModulesToSaveWrapper
-> 1398         _set_trainable(self, adapter_name)
   1399 
   1400     def add_adapter(self, adapter_name: str, peft_config: PeftConfig) -> None:

/usr/local/lib/python3.10/dist-packages/peft/utils/other.py in _set_trainable(model, adapter_name)
    401                 target.set_adapter(target.active_adapter)
    402             else:
--> 403                 new_module = ModulesToSaveWrapper(target, adapter_name)
    404                 new_module.set_adapter(adapter_name)
    405                 setattr(parent, target_name, new_module)

/usr/local/lib/python3.10/dist-packages/peft/utils/other.py in __init__(self, module_to_save, adapter_name)
    197         self._active_adapter = adapter_name
    198         self._disable_adapters = False
--> 199         self.update(adapter_name)
    200         self.check_module()
    201 

/usr/local/lib/python3.10/dist-packages/peft/utils/other.py in update(self, adapter_name)
    261         self.original_module.requires_grad_(False)
    262         if adapter_name == self.active_adapter:
--> 263             self.modules_to_save[adapter_name].requires_grad_(True)
    264 
    265     def _create_new_hook(self, old_hook):

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in requires_grad_(self, requires_grad)
   2885         """
   2886         for p in self.parameters():
-> 2887             p.requires_grad_(requires_grad)
   2888         return self
   2889 

RuntimeError: only Tensors of floating point dtype can require gradients

it appears only when choosing task_type = TaskType.SEQ_CLS or TOKEN_CLS inside LoraConfig.
if I choose any other type like QUESTION_ANS or CAUSAL_LM no error occurs.

1 Like

Similar to this case.