How do i finetune a phi-2 model which has been pre trained on a specific dataset

vish26 · July 10, 2024, 10:37am

So I’ve finetuned phi-2 with 1500 columns of food dataset in this model below

https://huggingface.co/vish26/phi2-cookbook/tree/main

And these are my model training parameters


from transformers import TrainerCallback, TrainingArguments, Trainer, IntervalStrategy
import torch

model_id = "microsoft/phi-2"
new_model = 'Recipe-Generator'

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

# BitsAndBytes configuration for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',  # normalizing float 4
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False  # Avoid double quantization for better performance
)

try:
    # Load model with quantization
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        trust_remote_code=True,
        quantization_config=bnb_config,
        flash_attn=True,
        flash_rotary=True,
        low_cpu_mem_usage=True,
        device_map={"": 0},
        revision='refs/pr/23'
    )

    # Set model configuration for training
    model.config.use_cache = False
    model.config.pretraining_tp = 1

    # Prepare model for k-bit training
    model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

    # Training arguments
    training_args = TrainingArguments(
        output_dir='./Recipe-Generator',
        num_train_epochs=1,
        per_device_train_batch_size=1,  # Reduce batch size to fit in memory
        gradient_accumulation_steps=64,  # Increase gradient accumulation steps
        eval_strategy=IntervalStrategy.STEPS,
        eval_steps=1500,
        save_total_limit=2,
        optim='paged_adamw_8bit',
        learning_rate=2e-4,
        lr_scheduler_type='cosine',
        save_steps=1500,
        warmup_ratio=0.05,
        weight_decay=0.01,
        fp16=True,  # Use mixed precision
        max_steps=-1
    )

    # PEFT configuration
    peft_config = LoraConfig(
        r=32,
        lora_alpha=64,
        lora_dropout=0.05,
        bias='none',
        task_type='CAUSAL_LM',
        target_modules=['Wqkv', 'fc1', 'fc2']
    )

    # Initialize trainer
    trainer = SFTTrainer(
        model=model,
        train_dataset=training_ds,
        eval_dataset=evaluation_ds,
        peft_config=peft_config,
        dataset_text_field='text',
        tokenizer=tokenizer,
        args=training_args,
    )

    torch.cuda.empty_cache()
    trainer.train()
    trainer.save_model('./Recipe-Generator')
    tokenizer.save_pretrained('./Recipe-Generator')

except Exception as e:
    print('At line:', e.__traceback__.tb_lineno)
    print('________________ERROR________________:', e)

And after that i’ve saved my model in huggingface

My question is how to fine-tune a fine-tuned model from huggingface i want to fine tune my model on this dataset (Hieu-Pham/kaggle_food_recipes)

This is how i loaded my fine tuned model in code

from trl import SFTTrainer
from peft import LoraConfig,prepare_model_for_kbit_training
from transformers import (AutoTokenizer, 
                        AutoModelForCausalLM,
                        pipeline,
                        BitsAndBytesConfig,
                        GemmaTokenizer,
                        HfArgumentParser,
                        TrainingArguments)
import torch

# Load the tokenizer
model_id = "vish26/phi2-cookbook"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'

# BitsAndBytes configuration for 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',  # normalizing float 4
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False  # Avoid double quantization for better performance
)

def preprocess_function(examples):
    # Concatenate Title, Ingredients, and Instructions into a single string for each example
    inputs = [title + ": " + ingredients + "\nInstructions:\n" + instructions
              for title, ingredients, instructions in zip(examples['Title'], examples['Ingredients'], examples['Instructions'])]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")
    return model_inputs

# Apply the preprocessing function to the dataset
tokenized_datasets = filtered_dataset.map(preprocess_function, batched=True)


# Set the format of the dataset to PyTorch tensors
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask'])

# Specify the training dataset
train_dataset = tokenized_datasets['train']

try:
    print(model_id)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        trust_remote_code=True,
        quantization_config=bnb_config,
        flash_attn=True,
        flash_rotary=True,
        low_cpu_mem_usage=True,
        device_map={"": 0},
        revision='refs/pr/23'
    )
    model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)

    # Set model configuration for training
    model.config.use_cache = False
    model.config.pretraining_tp = 1
    # Define training arguments
    training_args = TrainingArguments(
        output_dir='./checkpoint-1',            # Output directory
        overwrite_output_dir=True,         # Overwrite the content of the output directory
        num_train_epochs=1,
        per_device_train_batch_size=1,  # Reduce batch size to fit in memory
        gradient_accumulation_steps=64,  # Increase gradient accumulation steps
        eval_strategy=IntervalStrategy.STEPS,
        eval_steps=1500,
        save_total_limit=2,
        optim='paged_adamw_8bit',
        learning_rate=2e-4,
        lr_scheduler_type='cosine',
        save_steps=1500,
        warmup_ratio=0.05,
        weight_decay=0.01,
        fp16=True,  # Use mixed precision
        max_steps=-1# Warmup steps for learning rate scheduler
    )
     # PEFT configuration
    peft_config = LoraConfig(
        r=32,
        lora_alpha=64,
        lora_dropout=0.05,
        bias='none',
        task_type='CAUSAL_LM',
        target_modules=['Wqkv', 'fc1', 'fc2']
    )
    # Define the trainer
    trainer = SFTTrainer(
        model=model,
        train_dataset=train_dataset,
        # eval_dataset=evaluation_ds,
        peft_config=peft_config,
        dataset_text_field='text',
        tokenizer=tokenizer,
        args=training_args,
    )

    # Start training
    torch.cuda.empty_cache()
    trainer.train()
    trainer.save_model('./checkpoint-1')
    tokenizer.save_pretrained('./checkpoint-1')
except Exception as e:
    print(f'AT LINE {e.__traceback__.tb_lineno}: {e.args[0]}')

When i run the above block i get this error:

Blockquote
AT LINE 45: refs/pr/23 is not a valid git identifier (branch name, tag name or commit id) that exists for this model name. Check the model page at ‘vish26/phi2-cookbook · Hugging Face’ for available revisions.
Blockquote

Help me fine tune my pre trained model.

Topic		Replies	Views
How to finetune Microsoft Phi-2 on Wikitext2 dataset 🤗Transformers	2	93	September 14, 2024
Unable to prepare model for kbit training 🤗Transformers	2	2367	November 14, 2023
Fine tuned phi2 model loses context once loaded from local Beginners	0	209	February 14, 2024
Token-by-Token Fine-Tuning of the phi-2 Model for code generation Models	0	25	September 13, 2024
I want to fine tune the KoGPT2 model using Trainer Intermediate	0	482	December 7, 2020

How do i finetune a phi-2 model which has been pre trained on a specific dataset

Related topics