Hi all,
I fine-tuned a quantized roberta-base
classification model using PEFT + LoRA. Then, training runs fine, and I save the adapter.
from datasets import load_dataset
import evaluate
from peft import (
LoraConfig,
TaskType,
get_peft_model,
prepare_model_for_kbit_training
)
import torch
from transformers import (
AutoTokenizer,
DataCollatorWithPadding,
AutoModelForSequenceClassification,
BitsAndBytesConfig,
Trainer,
TrainingArguments
)
checkpoint = "dstefa/roberta-base_topic_classification_nyt_news"
# create quantization object
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
llm_int8_skip_modules=["classifier"]
)
base_model = AutoModelForSequenceClassification.from_pretrained(
checkpoint,
num_labels=num_labels,
id2label=id2label,
label2id=label2id,
ignore_mismatched_sizes=True,
quantization_config=quantization_config
)
# preprocess the quantized model for training
model = prepare_model_for_kbit_training(base_model)
# create LoRA config object
lora_config = LoraConfig(
task_type=TaskType.SEQ_CLS,
inference_mode=False, # set to Fasle for training
r=8,
lora_alpha=16,
lora_dropout=0.1,
bias='none',
modules_to_save=["classifier.dense", "classifier.out_proj"],
)
# create a trainable PeftModel
final_model = get_peft_model(model, lora_config)
final_training_args = TrainingArguments(
output_dir="/content/drive/MyDrive/Projects/new-topic-classifier/checkpoint/",
num_train_epochs=2,
# eval_strategy="epoch",
# save_strategy="epoch",
eval_strategy="steps",
eval_steps=10000,
save_strategy="steps",
save_steps=10000,
save_total_limit=3,
load_best_model_at_end=False,
logging_strategy="steps",
logging_steps=50,
logging_first_step=True,
fp16=True,
run_name="final_topic_classifier_run",
report_to="wandb", # W&B is active
push_to_hub=True,
hub_model_id="####/New-topic-classifier-training-model-storage",
hub_strategy="checkpoint",
)
final_trainer = Trainer(
model=final_model,
args=final_training_args,
train_dataset=train_dataset,
eval_dataset=val_dataset,
processing_class=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
final_trainer.train()
# Save the adapter model after training
adapter_output_dir = "/content/drive/MyDrive/Projects/new-topic-classifier/final_adapter"
final_trainer.model.save_pretrained(adapter_output_dir)
# Push the adapter model to Hugging Face Hub
adapter_repo_name = "XXXX/agnews_classifier_naive_model_adapters"
final_trainer.model.push_to_hub(adapter_repo_name)
But when I try to use if for inference like this
## inference
checkpoint = "dstefa/roberta-base_topic_classification_nyt_news"
adapter_repo_name = "XXXX/agnews_classifier_naive_model_adapters"
# create quantization object
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16,
llm_int8_skip_modules=["classifier"]
)
base_model = AutoModelForSequenceClassification.from_pretrained(
checkpoint,
num_labels=num_labels,
id2label=id2label,
label2id=label2id,
ignore_mismatched_sizes=True,
quantization_config=quantization_config
)
base_model.load_adapter(adapter_repo_name)
I got an error:
KeyError: 'classifier.dense.weight'
I tried another way to load a model with the adapter, but it returned the same error.
PeftModel.from_pretrained(base_model, adapter_repo_name)
How should I properly load an adapter for inference in a quantized sequence classification model? Is the issue related to any config setting or training arguments?
Thank you for your help in advance.