I am attempting to fine tune a Llama2 based model for sequence classification with LoftQ quantization, but it occurs error when it trains.
How can I get the model training to run successfully?
[Model] “elyza/ELYZA-japanese-Llama-2-7b”
[Error]
RuntimeError Traceback (most recent call last)
<ipython-input-14-b0ee58b0a570> in <cell line: 31>()
29 )
30
---> 31 trainer.train()
---
/usr/local/lib/python3.10/dist-packages/torch/nn/modules/linear.py in forward(self, input)
112
113 def forward(self, input: Tensor) -> Tensor:
--> 114 return F.linear(input, self.weight, self.bias)
115
116 def extra_repr(self) -> str:
RuntimeError: mat1 and mat2 shapes cannot be multiplied (22560x4096 and 1x1)
Reproduction codes are following:
# Install following libraries
# pip install torch transformers datasets bitsandbytes accelerate peft
import torch
from datasets import load_dataset
from transformers import AutoTokenizer
from accelerate import Accelerator
from transformers import AutoModelForSequenceClassification, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType, prepare_model_for_kbit_training, LoftQConfig
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
device = "cuda:0" if torch.cuda.is_available() else "cpu"
# Load dataset
dataset = load_dataset("yelp_review_full", split="train[:1%]")
dataset = dataset.train_test_split(test_size=0.2)
dataset = dataset.rename_column("label", "labels")
model_name = "elyza/ELYZA-japanese-Llama-2-7b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.padding_side = "right"
def tokenize_function(examples):
return tokenizer(examples["text"], padding=True, truncation=True, max_length=4096, return_tensors='pt')
tokenized_datasets = dataset.map(tokenize_function, batched=True)
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
loftq_config = LoftQConfig(loftq_bits=4)
lora_config = LoraConfig(
init_lora_weights="loftq",
loftq_config=loftq_config,
r=16,
lora_alpha=8,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.SEQ_CLS
)
# LoftQ quantization
accelerator = Accelerator()
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=5, quantization_config=bnb_config, device_map=device)
model = get_peft_model(model, lora_config)
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted', zero_division=0)
acc = accuracy_score(labels, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
print(model)
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=1,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
warmup_steps=500,
weight_decay=0.01,
save_total_limit=1,
dataloader_pin_memory=False,
evaluation_strategy="steps",
logging_steps=50,
logging_dir='./logs'
)
data_collator=DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["test"],
compute_metrics=compute_metrics,
data_collator=data_collator
)
trainer.train() # Error occured here