So I’ve finetuned phi-2 with 1500 columns of food dataset in this model below
https://huggingface.co/vish26/phi2-cookbook/tree/main
And these are my model training parameters
from transformers import TrainerCallback, TrainingArguments, Trainer, IntervalStrategy
import torch
model_id = "microsoft/phi-2"
new_model = 'Recipe-Generator'
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
# BitsAndBytes configuration for 4-bit quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type='nf4', # normalizing float 4
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False # Avoid double quantization for better performance
)
try:
# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
quantization_config=bnb_config,
flash_attn=True,
flash_rotary=True,
low_cpu_mem_usage=True,
device_map={"": 0},
revision='refs/pr/23'
)
# Set model configuration for training
model.config.use_cache = False
model.config.pretraining_tp = 1
# Prepare model for k-bit training
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
# Training arguments
training_args = TrainingArguments(
output_dir='./Recipe-Generator',
num_train_epochs=1,
per_device_train_batch_size=1, # Reduce batch size to fit in memory
gradient_accumulation_steps=64, # Increase gradient accumulation steps
eval_strategy=IntervalStrategy.STEPS,
eval_steps=1500,
save_total_limit=2,
optim='paged_adamw_8bit',
learning_rate=2e-4,
lr_scheduler_type='cosine',
save_steps=1500,
warmup_ratio=0.05,
weight_decay=0.01,
fp16=True, # Use mixed precision
max_steps=-1
)
# PEFT configuration
peft_config = LoraConfig(
r=32,
lora_alpha=64,
lora_dropout=0.05,
bias='none',
task_type='CAUSAL_LM',
target_modules=['Wqkv', 'fc1', 'fc2']
)
# Initialize trainer
trainer = SFTTrainer(
model=model,
train_dataset=training_ds,
eval_dataset=evaluation_ds,
peft_config=peft_config,
dataset_text_field='text',
tokenizer=tokenizer,
args=training_args,
)
torch.cuda.empty_cache()
trainer.train()
trainer.save_model('./Recipe-Generator')
tokenizer.save_pretrained('./Recipe-Generator')
except Exception as e:
print('At line:', e.__traceback__.tb_lineno)
print('________________ERROR________________:', e)
And after that i’ve saved my model in huggingface
My question is how to fine-tune a fine-tuned model from huggingface i want to fine tune my model on this dataset (Hieu-Pham/kaggle_food_recipes)
This is how i loaded my fine tuned model in code
from trl import SFTTrainer
from peft import LoraConfig,prepare_model_for_kbit_training
from transformers import (AutoTokenizer,
AutoModelForCausalLM,
pipeline,
BitsAndBytesConfig,
GemmaTokenizer,
HfArgumentParser,
TrainingArguments)
import torch
# Load the tokenizer
model_id = "vish26/phi2-cookbook"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'
# BitsAndBytes configuration for 4-bit quantization
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type='nf4', # normalizing float 4
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=False # Avoid double quantization for better performance
)
def preprocess_function(examples):
# Concatenate Title, Ingredients, and Instructions into a single string for each example
inputs = [title + ": " + ingredients + "\nInstructions:\n" + instructions
for title, ingredients, instructions in zip(examples['Title'], examples['Ingredients'], examples['Instructions'])]
model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
return model_inputs
# Apply the preprocessing function to the dataset
tokenized_datasets = filtered_dataset.map(preprocess_function, batched=True)
# Set the format of the dataset to PyTorch tensors
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask'])
# Specify the training dataset
train_dataset = tokenized_datasets['train']
try:
print(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
quantization_config=bnb_config,
flash_attn=True,
flash_rotary=True,
low_cpu_mem_usage=True,
device_map={"": 0},
revision='refs/pr/23'
)
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
# Set model configuration for training
model.config.use_cache = False
model.config.pretraining_tp = 1
# Define training arguments
training_args = TrainingArguments(
output_dir='./checkpoint-1', # Output directory
overwrite_output_dir=True, # Overwrite the content of the output directory
num_train_epochs=1,
per_device_train_batch_size=1, # Reduce batch size to fit in memory
gradient_accumulation_steps=64, # Increase gradient accumulation steps
eval_strategy=IntervalStrategy.STEPS,
eval_steps=1500,
save_total_limit=2,
optim='paged_adamw_8bit',
learning_rate=2e-4,
lr_scheduler_type='cosine',
save_steps=1500,
warmup_ratio=0.05,
weight_decay=0.01,
fp16=True, # Use mixed precision
max_steps=-1# Warmup steps for learning rate scheduler
)
# PEFT configuration
peft_config = LoraConfig(
r=32,
lora_alpha=64,
lora_dropout=0.05,
bias='none',
task_type='CAUSAL_LM',
target_modules=['Wqkv', 'fc1', 'fc2']
)
# Define the trainer
trainer = SFTTrainer(
model=model,
train_dataset=train_dataset,
# eval_dataset=evaluation_ds,
peft_config=peft_config,
dataset_text_field='text',
tokenizer=tokenizer,
args=training_args,
)
# Start training
torch.cuda.empty_cache()
trainer.train()
trainer.save_model('./checkpoint-1')
tokenizer.save_pretrained('./checkpoint-1')
except Exception as e:
print(f'AT LINE {e.__traceback__.tb_lineno}: {e.args[0]}')
When i run the above block i get this error:
Blockquote
AT LINE 45: refs/pr/23 is not a valid git identifier (branch name, tag name or commit id) that exists for this model name. Check the model page at ‘vish26/phi2-cookbook · Hugging Face’ for available revisions.
Blockquote
Help me fine tune my pre trained model.