Peft following bits and bytes seems to have no effect on LLM

scotsditch · January 31, 2024, 3:12am

I have the python 3 code below. I’m using it to peft fine tune a flan-t5 model with lora to summarize a text. I’ve first reduced the precision with bits and bytes so that the model can fit on my single gpu. When I evaluate the original model using rouge score against human baseline, and then compare it to each of the peft adapter models I have below, they’re getting the exact same rouge scores. I’m wondering if since I’ve reduced the precision for the model weights so much with bits and bytes does peft fine tuning have no effect? Can you see any other reason peft would have no effect for the range of rank, epochs, and max_steps I’ve used below?

code:

from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
import datetime
import logging

import time


# ### Load Dataset and LLM


huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

dataset




# need huggingface apikey
from config import api_key

apikey=api_key


# loading pretrained model 

# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library

from torch import cuda, bfloat16
import transformers

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'


bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model_name='google/flan-t5-base'

model_id='google/flan-t5-base'

hf_auth = apikey
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)





original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, 
                 trust_remote_code=True,
    config=model_config,
                 quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth,
    cache_dir='/home/username/stuff/username_storage/LLM/weights/huggingface/hub/',
torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)





index = 200

dialogue = dataset['test'][index]['dialogue']
summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary:
"""

inputs = tokenizer(prompt, return_tensors='pt')
output = tokenizer.decode(
    original_model.generate(
        inputs["input_ids"].cuda(),
        max_new_tokens=200,
    )[0],
    skip_special_tokens=True
)

dash_line = '-'.join('' for x in range(100))


# updated 11/1/23 to ensure using gpu
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids    .cuda()
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids    .cuda()

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])




def pipeline_bnb_peft_lora(rank,
                           name,
                          train_epochs,
                          max_steps,
                          original_model,
                          data):
    
    
    from peft import LoraConfig, get_peft_model, TaskType

    lora_config = LoraConfig(

        r=rank, # Rank
        lora_alpha=32,
        target_modules=["q", "v"],
        lora_dropout=0.05,
        bias="none",
        task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5

    )


    # Add LoRA adapter layers/parameters to the original LLM to be trained.



    peft_model = get_peft_model(original_model,
                                lora_config)
    
    # ### Train PEFT Adapter
    #
    # Define training arguments and create `Trainer` instance.


    output_dir = f'/home/username/stuff/username_storage/LLM/PEFT/train_args/'+name

    peft_training_args = TrainingArguments(
        output_dir=output_dir,

        per_device_train_batch_size=1, 
        learning_rate=1e-3, # Higher learning rate than full fine-tuning.

        num_train_epochs=train_epochs, # updated 12/19/23 train on higher number of epochs
        max_steps=max_steps,
        fp16=True
    )

    peft_trainer = Trainer(
        model=peft_model,
        args=peft_training_args,
        train_dataset=data,
    )



    peft_trainer.train()

    peft_model_path="/home/username/stuff/username_storage/LLM/PEFT/"+name

    peft_trainer.model.save_pretrained(peft_model_path)
    tokenizer.save_pretrained(peft_model_path)



# adding a timestamp to logname
ts=str(datetime.datetime.now().isoformat())  

# logging.basicConfig(filename='example.log',level=logging.DEBUG)
logging.basicConfig(filename='/mnt/data/sda/user_storage/username_storage/LLM/error_logs'+ts+'.log', level=logging.DEBUG, 
                    format='%(asctime)s %(levelname)s %(name)s %(message)s')

logger=logging.getLogger(__name__)


rank_list=[4,8,16,32]
epoch_list=[1,5,10,20]
max_step_list=[1,5,10,50]


# test rank
for x in rank_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=x,
                           name='testrank011224_'+str(x),
                          train_epochs=1,
                          max_steps=1,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass



# test epoch
for x in epoch_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=4,
                           name='testepoch011224_'+str(x),
                          train_epochs=x,
                          max_steps=1,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass


# test max_steps
for x in max_step_list:
    
    try:
        
        pipeline_bnb_peft_lora(rank=4,
                           name='testmaxsteps011224_'+str(x),
                          train_epochs=1,
                          max_steps=x,
                          original_model=original_model,
                          data=tokenized_datasets["train"])
        
    except Exception as err:
        
        logger.error('pipeline_bnb_peft_lora '+name+' failed: '+str(err))        
        
        
    pass
        
    
        
        

type or paste code here

Topic		Replies	Views
Low bf16 performance on TPU, int4 vs int8 quantizatoin 🤗Accelerate	0	177	June 1, 2024
Finetuned LLM model conversion to GGUF - performance drop Models	2	439	June 26, 2024
Peft model from pretrained load in 8/4 bit 🤗Transformers	6	14090	October 12, 2023
Trainer in PEFT doesn't report evaluation metrics 🤗Transformers	0	74	July 5, 2024
Loading quantised weights does not work Beginners	0	115	April 12, 2024

Peft following bits and bytes seems to have no effect on LLM

Related Topics