Hi,
I am trying to fine-tune to make function call predictions better on Meta-Llama-3.1-8B-Instruct. To do that I created a dataset and applied steps regarding to Fine-Tuning Llama-3.1-8B for Function Calling using LoRA | by Gautam Chutani | Medium blog. As a result I can see function name and parameters are predicting perfectly, but now the model is generating weird answers [get_weather(city=“IL”)] regarding to prompt like “how are you?”.
Please find the code snippets below belong training;
import torch
from unsloth import FastLanguageModel
max_seq_length = 2048 # Unsloth auto supports RoPE Scaling internally!
dtype = None # None for auto detection
load_in_4bit = False # Use 4bit quantization to reduce memory usage. Can be False.
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "meta-llama/Llama-3.1-8B-Instruct",
max_seq_length = max_seq_length,
dtype = dtype,
load_in_4bit = load_in_4bit,
)
model = FastLanguageModel.get_peft_model(
model,
r=16, # LoRA rank - suggested values: 8, 16, 32, 64, 128
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
lora_alpha=16,
lora_dropout=0, # Supports any, but = 0 is optimized
bias="none", # Supports any, but = "none" is optimized
use_gradient_checkpointing="unsloth", # Ideal for long context tuning
random_state=3407,
use_rslora=False, # Disable rank-sensitive LoRA for simpler tasks
loftq_config=None # No LoftQ, for standard fine-tuning
)
from unsloth.chat_templates import get_chat_template
# Initialize the tokenizer with the chat template and mapping
tokenizer = get_chat_template(
tokenizer,
chat_template = "llama-3",
mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
map_eos_token = True, # Maps <|im_end|> to <|eot_id|> instead
)
def formatting_prompts_func(examples):
convos = []
# Iterate through each item in the batch (examples are structured as lists of values)
for query, tools, answers in zip(examples['query'], examples['tool'], examples['answer']):
tool_user = {
"content": f"You are a helpful assistant with access to the following tools or function calls. Your task is to produce a sequence of tools or function calls necessary to generate response to the user utterance. Use the following tools or function calls as required:\n{tools}",
"role": "system"
}
ques_user = {
"content": f"{query}",
"role": "user"
}
assistant = {
"content": f"{answers}",
"role": "assistant"
}
convos.append([tool_user, ques_user, assistant])
texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
return {"text": texts}
# Apply the formatting on dataset
dataset = dataset.map(formatting_prompts_func, batched = True,)
from transformers import TrainingArguments
args = TrainingArguments(
per_device_train_batch_size = 8, # Controls the batch size per device
gradient_accumulation_steps = 2, # Accumulates gradients to simulate a larger batch
warmup_steps = 5,
learning_rate = 2e-4, # Sets the learning rate for optimization
num_train_epochs = 2,
fp16 = not torch.cuda.is_bf16_supported(),
bf16 = torch.cuda.is_bf16_supported(),
optim = "adamw_8bit",
weight_decay = 0.01, # Regularization term for preventing overfitting
lr_scheduler_type = "linear", # Chooses a linear learning rate decay
seed = 3407,
output_dir = "outputs",
logging_steps = 1, # Sets frequency of logging to W&B
logging_strategy = "steps", # Logs metrics at each specified step
save_strategy = "no",
load_best_model_at_end = True, # Loads the best model at the end
report_to = "none",
save_only_model = False # Saves entire model, not only weights
)
from trl import SFTTrainer
trainer = SFTTrainer(
model = model,
processing_class = tokenizer,
train_dataset = dataset,
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 2,
packing = False, # Can make training 5x faster for short sequences.
args = args
)
from unsloth import unsloth_train
trainer_stats = unsloth_train(trainer)
print(trainer_stats)
What I am missing?
Thank you for your helps