Hi team,
I’m using huggingface framework to fine-tune LLMs. Currently, I’m using mistral model. I wanted to save the fine-tuned model and load it later and do inference with it.
Since, I’m new to Huggingface framework I would like to get your guidance on saving, loading, and inferencing.
I remember in PyTorch we need to use with torch.no_grad():
context manager to do inference. But, I’m not seeing such thing in HuggingFace.
@nielsr Could you please guide me here?
import pandas as pd
import torch
from datasets import Dataset, load_dataset
from random import randrange
from peft import LoraConfig, get_peft_model, AutoPeftModelForCausalLM, prepare_model_for_int8_training
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from trl import SFTTrainer
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv("train.csv")
train = Dataset.from_pandas(df)
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True,device_map="auto")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
model = AutoModelForCausalLM.from_pretrained(model_id,
load_in_4bit = True,
torch_dtype = torch.float16,
device_map="auto")
model.resize_token_embeddings(len(tokenizer))
model = prepare_model_for_int8_training(model)
peft_config = LoraConfig(
lora_alpha=16,
lora_dropout=0.1,
r=64,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, peft_config)
args = TrainingArguments(
output_dir='custom_domain',
num_train_epochs=2, # adjust based on the data size
per_device_train_batch_size=8, # use 4 if you have more GPU RAM
optim = "adamw_torch",
logging_steps = 100,
save_total_limit = 2,
save_strategy = "no",
load_best_model_at_end=False,
learning_rate=2e-4,
fp16=not torch.cuda.is_bf16_supported(),
bf16 = torch.cuda.is_bf16_supported(),
evaluation_strategy="epoch",
seed=42,
warmup_ratio = 0.1,
lr_scheduler_type = "linear",
report_to="none",
torch_compile = True
#dataloader_num_workers = 4
)
# Create the trainer
trainer = SFTTrainer(
model=model,
train_dataset=train,
# eval_dataset=test,
dataset_text_field='text',
peft_config=peft_config,
max_seq_length=512,
tokenizer=tokenizer,
args=args,
packing=False,
)
trainer.train()
I couldn’t find any complete example in the HuggingFace page. Can you please provide me relevant example?
Additionally, I noticed mistral model is not able to compile using HuggingFace framework during fine-tuning.