Hi guys!
I’ve been working with Mistral 7B model in order to chat with my own data.
I’m workin with a MongoDB dataset about restaurants, but when I ask my model about anything related with this dataset, it returns me a wrong outpur.
I’ve been checking the context and it seems to be there the main problem.
I post the code here. Hope someone can help me. I appreciate it guys, thank you all.
!pip install -q -U torch datasets transformers==4.36.1 tensorflow langchain playwright html2text sentence_transformers faiss-cpu
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 trl==0.4.7
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_transformers import Html2TextTransformer
#from langchain.document_loaders import AsyncChromiumLoader
from langchain_community.document_loaders.mongodb import MongodbLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS, Chroma
from langchain.prompts import PromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from huggingface_hub import login
login(“xxxxxxxxxxxxxxxxxxxx”)
#################################################################
Tokenizer
#################################################################
model_name=‘mistralai/Mistral-7B-Instruct-v0.2’
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = “right”
#################################################################
bitsandbytes parameters
#################################################################
Activate 4-bit precision base model loading
use_4bit = True
Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = “float16”
Quantization type (fp4 or nf4)
bnb_4bit_quant_type = “nf4”
Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False
#################################################################
Set up quantization config
#################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
load_in_4bit=use_4bit,
bnb_4bit_quant_type=bnb_4bit_quant_type,
bnb_4bit_compute_dtype=compute_dtype,
bnb_4bit_use_double_quant=use_nested_quant,
)
Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
major, _ = torch.cuda.get_device_capability()
if major >= 8:
print(“=” * 80)
print(“Your GPU supports bfloat16: accelerate training with bf16=True”)
print(“=” * 80)
#################################################################
Load pre-trained config
#################################################################
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
)
def print_number_of_trainable_model_parameters(model):
trainable_model_params = 0
all_model_params = 0
for _, param in model.named_parameters():
all_model_params += param.numel()
if param.requires_grad:
trainable_model_params += param.numel()
return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"
print(print_number_of_trainable_model_parameters(model))
text_generation_pipeline = pipeline(
model=model,
tokenizer=tokenizer,
task=“text-generation”,
temperature=0,
repetition_penalty=1.1,
return_full_text=True,
max_new_tokens=1000,
)
mistral_llm = HuggingFacePipeline(pipeline=text_generation_pipeline)
!playwright install
!playwright install-deps
import nest_asyncio
nest_asyncio.apply()
#!pip install motor
Articles to index
loader = MongodbLoader(
connection_string=“mongodb+srv://victoriglesias5:******@prueba1.hdlxqaf.mongodb.net/”,
db_name = “sample_restaurants”,
collection_name=“restaurants”,
filter_criteria={“borough”: “Bronx”, “cuisine”: “Bakery”}
)
docs = loader.load()
Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs)
#!pip install chromadb
Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
HuggingFaceEmbeddings(model_name=‘sentence-transformers/all-mpnet-base-v2’))
retriever = db.as_retriever(
#search_type=“similarity”,
#search_kwargs={‘k’: 20}
)
query = “¿Which restaurant has an id: ‘30075445’?”
docs = db.similarity_search(query)
print(docs[0].page_content)
prompt_template = “”"
[INST] Instruction: Answer the following question according to your knowledge about the dataset. Here is some context to help you:
{context}
QUESTION:
{question} [/INST]
“”"
Create prompt from prompt template
prompt = PromptTemplate(
input_variables=[“context”, “question”],
template=prompt_template,
)
Create llm chain
llm_chain = LLMChain(llm=mistral_llm, prompt=prompt)
rag_chain = (
{“context”: retriever, “question”: RunnablePassthrough()}
| llm_chain
)
result = rag_chain.invoke(“According to the database, ¿What is the ‘name’ of the restaurant with a ‘restaurant_id’ = ‘30075445’”)
result[‘context’]
resultado = result[‘text’]
print(resultado)
I beleive the error is in the context. Please help