Hi Alan,
Thanks so much for your response. I am facing some issues while setting up everything for fine-tuning.
I used your above code and embedded it into my current code:
import torch
# Define the device: use GPU if available, otherwise fall back to CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Calculate class counts
class_counts = X_train['labels'].value_counts() # Replace with your column name
total_samples = class_counts.sum()
# Compute weights (inverse frequency)
class_weights = total_samples / (len(class_counts) * class_counts)
# Convert weights to a PyTorch tensor
class_weights = torch.tensor(class_weights.values, dtype=torch.float).to(device)
print("Class Weights:", class_weights)
loss_fn = nn.CrossEntropyLoss(weight=class_weights)
from transformers import Trainer
class CustomLossTrainer(Trainer):
def __init__(self, *args, loss_fn=None, **kwargs):
super().__init__(*args, **kwargs)
if loss_fn is None:
raise ValueError("You must supply a custom loss function.")
self.loss_fn = loss_fn
def compute_loss(self, model, inputs, return_outputs=False):
# Extract labels from inputs (they should be under the key "labels")
labels = inputs.get("labels")
outputs = model(**inputs) # The model should output a dictionary with "logits"
logits = outputs.get("logits")
loss = self.loss_fn(logits, labels)
return (loss, outputs) if return_outputs else loss
base_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
base_model_name,
device_map="auto",
torch_dtype="float16",
num_labels=3
)
model.config.use_cache = False
model.config.pretraining_tp = 1
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id
def tokenize_function(example):
return tokenizer(example["text"], truncation=True, padding='longest')
# Map the tokenization function over the dataset
tokenized_train_data = train_data.map(tokenize_function, batched=True)
tokenized_eval_data = eval_data.map(tokenize_function, batched=True)
tokenized_train_data = tokenized_train_data.remove_columns(["text"])
tokenized_eval_data = tokenized_eval_data.remove_columns(["text"])
Data Collator for Dynamic Padding
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
modules = ['q_proj', 'v_proj', 'k_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj']
from peft import LoraConfig, get_peft_model
output_dir = "llama-3.1-fine-tuned-model-weighted-loss"
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias="none",
task_type="CAUSAL_LM",
target_modules=modules,
)
training_arguments = TrainingArguments(
output_dir=output_dir,
num_train_epochs=5,
per_device_train_batch_size=2,
gradient_accumulation_steps=4,
gradient_checkpointing=True,
learning_rate=2e-5,
weight_decay=0.01,
warmup_ratio=0.03,
lr_scheduler_type="cosine",
evaluation_strategy="epoch",
logging_steps=50,
fp16=True,
max_grad_norm=0.3,
)
trainer = CustomLossTrainer(
model=model,
args=training_arguments,
train_dataset=tokenized_train_data,
eval_dataset=tokenized_eval_data,
tokenizer=tokenizer,
data_collator=data_collator,
loss_fn=loss_fn,
)
trainer.train()
The error I’m getting:
ValueError: Unable to create tensor, you should probably activate truncation and/or padding with ‘padding=True’ ‘truncation=True’ to have batched tensors with the same length. Perhaps your features (labels
in this case) have excessive nesting (inputs type list
where type int
is expected).
Would you be able to help me please?
Thanks.