My generate reponse is wrong

Hello, I’m new to coding and I’ve been learning to code in LLM for some time now. I’m trying to create a chatbot in French using the vigolstral-chat model, which is a fine-tuned model based on Mistral7b. I’m attempting to create a chatbot using my own dataset, which consists of data from my faculty that I scraped from their website (public data), such as the number of students within the faculty, etc. I followed a tutorial to fine-tune the model and uploaded it correctly on Hugging Face. However, when I try to infer with my model, it responds with a random sequence of letters. I think there might be an issue with my dataset, which is structured as follows: LINE: text…

Does anyone have any idea where the problem might be coming from? Thank you very much.

from huggingface_hub import interpreter_login
interpreter_login()

import os
import argparse
import torch
import torch.nn as nn
from datasets import load_dataset,Features,Value,load_from_disk
import transformers
from functools import partial
from transformers import MistralForCausalLM, MistralModel, MistralConfig, AutoConfig, AutoModelForCausalLM, AutoTokenizer, set_seed, TrainingArguments, BitsAndBytesConfig, \
    DataCollatorForLanguageModeling, Trainer, TrainingArguments 
import bitsandbytes as bnb
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, AutoPeftModelForCausalLM ,PeftModel
from trl import SFTTrainer
from unidecode import unidecode
base_model="bofenghuang/vigostral-7b-chat"
new_model="ALIE_0.5"
data = load_dataset('json',data_files={'train': ['C:/Users/sacha/Documents/projet ALI/test vs/ALIE0.2/Dataset/lyon1_charlie_dataset_train.json'],
                                        'test': ['C:/Users/sacha/Documents/projet ALI/test vs/ALIE0.2/Dataset/lyon1_delta_dataset_validation.json']})

print(data['train'])
bnb_config = BitsAndBytesConfig(
    load_in_4bit= True,
    bnb_4bit_quant_type= "nf4",
    bnb_4bit_compute_dtype= torch.bfloat16,
    bnb_4bit_use_double_quant= False, #a voir
)

model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map={"": 0} )


model.config.use_cache = False # silence the warnings. Please re-enable for inference!
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token    

model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
        r=16,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj","gate_proj"]
    )
model = get_peft_model(model, peft_config)

training_arguments = TrainingArguments(
    output_dir= "C:/Users/sacha/Documents/projet ALI/test vs/ALIE0.2/end",
    num_train_epochs= 1,
    per_device_train_batch_size= 2,
    gradient_accumulation_steps= 4,
    optim = "paged_adamw_8bit",
    save_steps= 5000,
    logging_steps= 30,
    learning_rate= 2e-4,
    weight_decay= 0.001,
    fp16= False,
    bf16= False,
    max_grad_norm= 0.3,
    max_steps= -1,
    warmup_ratio= 0.3,
    group_by_length= True,
    lr_scheduler_type= "constant",
    
)

trainer = SFTTrainer(
    model=model,
    train_dataset=data['train'],
    eval_dataset=data['test'],
    peft_config=peft_config,
    max_seq_length= None,
    tokenizer=tokenizer,
    dataset_text_field="line",
    args=training_arguments,
    packing= False,
    
)

trainer.train()
# Save the fine-tuned model
trainer.model.save_pretrained(new_model)
model.config.use_cache = True
model.eval()
model.push_to_hub(new_model,use_temp_dir=False)

base_model="bofenghuang/vigostral-7b-chat"
new_model="AscheZ/ALIE_0.5"

base_model_reload = AutoModelForCausalLM.from_pretrained(
    base_model, low_cpu_mem_usage=True,
    return_dict=True,torch_dtype=torch.bfloat16,
    device_map= {"": 0})
model = PeftModel.from_pretrained(base_model_reload, new_model)
model = model.merge_and_unload()

tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)


My code for inference :

`model_name="bofenghuang/vigostral-7b-chat"
adapater_name="AscheZ/ALIE_0.5"
print(f"Starting to load the model {model_name} into memory")

m = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map={"": 0}
)
m = PeftModel.from_pretrained(m, adapater_name)
m = m.merge_and_unload()
tok = AutoTokenizer.from_pretrained(model_name)
tok.bos_token_id = 1

stop_token_ids = [0]

print(f"Successfully loaded the model {model_name} into memory")

prompt = "Combien il y'a d'étudiant à lyon 1? "
inputs = tok(prompt, return_tensors="pt").to('cuda')

outputs = m.generate(**inputs, do_sample=True, num_beams=1, max_new_tokens=100)
print(tok.batch_decode(outputs, skip_special_tokens=True))
``

And example of my dataset :

Screen of inference:

Thanks you for help :slight_smile: