I tried this:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
model_id = "microsoft/phi-2"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
# Configure quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
model_id,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
from datasets import load_dataset
dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")
def preprocess_function(examples):
return tokenizer(examples["text"], truncation=True, max_length=512)
tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset.column_names)
from peft import LoraConfig, get_peft_model
peft_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir="./phi2-wikitext",
learning_rate=2e-4,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
num_train_epochs=3,
weight_decay=0.01,
logging_steps=10,
save_steps=100,
save_total_limit=3,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
tokenizer=tokenizer,
)
print(trainer.compute_loss)
trainer.train()
but I keep getting an error that says the compute_loss method did not return loss:
Traceback (most recent call last):
File "train_phi.py", line 70, in <module>
trainer.train()
File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 1885, in train
return inner_training_loop(
File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 2216, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 3238, in training_step
loss = self.compute_loss(model, inputs)
File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 3282, in compute_loss
raise ValueError(
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.