How to finetune Microsoft Phi-2 on Wikitext2 dataset

I tried this:

from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_id = "microsoft/phi-2"

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

# Configure quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

from datasets import load_dataset

dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train")

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True, max_length=512)

tokenized_dataset = dataset.map(preprocess_function, batched=True, remove_columns=dataset.column_names)

from peft import LoraConfig, get_peft_model

peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, peft_config)

from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./phi2-wikitext",
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=10,
    save_steps=100,
    save_total_limit=3,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer,
)

print(trainer.compute_loss)

trainer.train()

but I keep getting an error that says the compute_loss method did not return loss:

Traceback (most recent call last):
  File "train_phi.py", line 70, in <module>
    trainer.train()
  File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 1885, in train
    return inner_training_loop(
  File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 2216, in _inner_training_loop
    tr_loss_step = self.training_step(model, inputs)
  File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 3238, in training_step
    loss = self.compute_loss(model, inputs)
  File "/home/mayank/miniconda3/envs/fedml-pip/lib/python3.8/site-packages/transformers/trainer.py", line 3282, in compute_loss
    raise ValueError(
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.

please check this : ValueError: The model did not return a loss from the inputs

I see, thank you for the link. But I didn’t have to use the label attribute while finetuning LLaMA-2.

Am I missing something ?