Hello, I have a problem when I fine tune “meta-llama/Llama-2-7b-hf” for classification task.
from datasets import load_dataset
from transformers import DataCollatorWithPadding
from transformers import LlamaTokenizer,
LlamaForSequenceClassification
import torch
from peft import get_peft_model, LoraConfig, TaskType
from transformers import Trainer, TrainingArguments
import evaluate
import numpy as np
class WeightedCELossTrainer(Trainer):
def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
labels = inputs.pop("labels")
print(labels.device)
print(model.device)
print(labels)
print(inputs)
outputs = model(**inputs)
print(outputs)
logits = outputs.get("logits")
loss_fct = torch.nn.CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
return (loss, outputs) if return_outputs else loss
def compute_metrics(eval_pred):
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")
accuracy_metric = evaluate.load("accuracy")
logits, labels = eval_pred
predictions = np.argmax(logits, axis=-1)
precision = precision_metric.compute(predictions=predictions, references=labels)["precision"]
recall = recall_metric.compute(predictions=predictions, references=labels)["recall"]
f1 = f1_metric.compute(predictions=predictions, references=labels)["f1"]
accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
return {"precision": precision, "recall": recall, "f1-score": f1, 'accuracy': accuracy}
MAX_LEN = 512
llama_checkpoint = "meta-llama/Llama-2-7b-hf"
dataset = load_dataset("mehdiiraqui/twitter_disaster")
data = dataset["train"].train_test_split(train_size=0.8, seed=42)
data["val"] = data.pop("test")
data["test"] = dataset["test"]
col_to_delete = ["id", "keyword", "location", "text"] # Remove the undesired columns
pos_weights = len(data['train'].to_pandas()) / (2 * data['train'].to_pandas().target.value_counts()[1])
neg_weights = len(data['train'].to_pandas()) / (2 * data['train'].to_pandas().target.value_counts()[0])
llama_tokenizer = LlamaTokenizer.from_pretrained(llama_checkpoint)
llama_tokenizer.pad_token_id = llama_tokenizer.eos_token_id
llama_tokenizer.pad_token = llama_tokenizer.eos_token
def llama_preprocess_function(examples):
return llama_tokenizer(examples["text"], truncation=True, padding="max_length", max_length=MAX_LEN)
llama_tokenized_datasets = data.map(llama_preprocess_function, batched=True, remove_columns=col_to_delete)
llama_tokenized_datasets = llama_tokenized_datasets.rename_column("target", "label")
llama_tokenized_datasets.set_format("torch")
llama_data_collator = DataCollatorWithPadding(tokenizer=llama_tokenizer)
llama_model = LlamaForSequenceClassification.from_pretrained(
pretrained_model_name_or_path=llama_checkpoint,
num_labels=2,
device_map="auto",
offload_folder="offload",
trust_remote_code=True,
torch_dtype=torch.float16,
)
llama_model.config.pad_token_id = llama_model.config.eos_token_id
llama_peft_config = LoraConfig(
task_type=TaskType.SEQ_CLS, r=4, lora_alpha=16, lora_dropout=0.5, bias="none",
target_modules=[
"q_proj",
"v_proj",
],
)
llama_model = get_peft_model(llama_model, llama_peft_config)
llama_model.print_trainable_parameters()
llama_model.cuda()
lr = 1e-4
batch_size = 2
num_epochs = 3
training_args = TrainingArguments(
output_dir="llama-lora-token-classification",
learning_rate=lr,
lr_scheduler_type= "constant",
warmup_ratio= 0.1,
max_grad_norm= 0.3,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=num_epochs,
weight_decay=0.001,
evaluation_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
report_to="none",
fp16=True,
gradient_checkpointing=True,
)
llama_trainer = WeightedCELossTrainer(
model=llama_model,
args=training_args,
train_dataset=llama_tokenized_datasets['train'],
eval_dataset=llama_tokenized_datasets["val"],
data_collator=llama_data_collator,
compute_metrics=compute_metrics
)
llama_trainer.train()
I got the RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:1 and cuda:0! (when checking argument for argument target in method wrapper_CUDA_nll_loss_forward). Can anyone give me some hints to solve the problem?