How to resume training from lora checkpoint

model / tokenizer= “Mistral model”

checkpoint_path = “model/checkpoint-1000”

lora_r = 16
lora_alpha = 64
lora_dropout = 0.1
lora_target_modules = [
“q_proj”,
“up_proj”,
“o_proj”,
“k_proj”,
“down_proj”,
“gate_proj”,
“v_proj”,
]

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=lora_target_modules,
    bias="none",
    task_type="CAUSAL_LM",
)
training_arguments = TrainingArguments(
    per_device_train_batch_size=1,
    gradient_accumulation_steps=32,
    optim="paged_adamw_32bit",
    logging_steps=1,
    learning_rate=1e-4,
    fp16=True,
    max_grad_norm=0.3,
    num_train_epochs=10,
    save_steps=100,
    evaluation_strategy="steps",
    eval_steps=0.2,
    warmup_ratio=0.05,
    save_strategy="steps",
    group_by_length="steps",
    output_dir=OUTPUT_DIR,
    report_to="tensorboard",
    save_safetensors=True,
    lr_scheduler_type="cosine",
    seed=42,
)

trainer = SFTTrainer(
model=model,
train_dataset=train_dataset[‘train’],
eval_dataset=val_dataset[‘train’],
peft_config=peft_config,
dataset_text_field=“text”,
max_seq_length=1024,
tokenizer=tokenizer,
args=training_arguments,

)