I have sentiment classification data (having ‘Positive’ and ‘Negative’ as the target variable). I fine-tuned the Llama2 model, and now at the time of inference, it only predicts 1 class no matter the sentence is positive or negative.
I am passing the tokenized version of text like
Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is the sentiment of this sentence? Positive\nNegative\n\nInput:\nThe boy sitting over there is happy.\n\n### Response:\n Positive
to the below method.
def fine_tune(model,
tokenizer,
dataset,
lora_r,
lora_alpha,
lora_dropout,
bias,
task_type,
per_device_train_batch_size,
gradient_accumulation_steps,
warmup_steps,
max_steps,
learning_rate,
fp16,
logging_steps,
output_dir,
optim):
"""
Prepares and fine-tune the pre-trained model.
:param model: Pre-trained Hugging Face model
:param tokenizer: Model tokenizer
:param dataset: Preprocessed training dataset
"""
# Enable gradient checkpointing to reduce memory usage during fine-tuning
model.gradient_checkpointing_enable()
# Prepare the model for training
model = prepare_model_for_kbit_training(model)
# Get LoRA module names
target_modules = find_all_linear_names(model)
# Create PEFT configuration for these modules and wrap the model to PEFT
peft_config = create_peft_config(lora_r, lora_alpha, target_modules, lora_dropout, bias, task_type)
model = get_peft_model(model, peft_config)
# Print information about the percentage of trainable parameters
print_trainable_parameters(model)
# Training parameters
trainer = Trainer(
model = model,
train_dataset = dataset,
args = TrainingArguments(
per_device_train_batch_size = per_device_train_batch_size,
gradient_accumulation_steps = gradient_accumulation_steps,
warmup_steps = warmup_steps,
max_steps = max_steps,
learning_rate = learning_rate,
fp16 = fp16,
logging_steps = logging_steps,
output_dir = output_dir,
optim = optim
),
data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False),
)
model.config.use_cache = False
### SOURCE https://github.com/artidoro/qlora/blob/main/qlora.py
# Verifying the datatypes before training
dtypes = {}
for _, p in model.named_parameters():
dtype = p.dtype
if dtype not in dtypes: dtypes[dtype] = 0
dtypes[dtype] += p.numel()
total = 0
for k, v in dtypes.items(): total+= v
for k, v in dtypes.items():
print(k, v, v/total)
do_train = True
# Launch training and log metrics
print("Training...")
if do_train:
train_result = trainer.train()
metrics = train_result.metrics
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()
print(metrics)
# Save model
print("Saving last checkpoint of the model...")
os.makedirs(output_dir, exist_ok = True)
trainer.model.save_pretrained(output_dir)
# Free memory for merging weights
del model
del trainer
torch.cuda.empty_cache()
Did anyone else also face this kind of issue?
Please note: I am passing an equal proportion of both classes while training.