Hello, I am having some problems on generating answers based on the csv that I got. I started by creating a vector database to store the embeddings of the csv data, in order to use it in the chatbot. This is in a file called embeddings.py.
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders.csv_loader import CSVLoader
DB_FAISS_PATH = "vectorstore/db_faiss"
loader = CSVLoader(file_path="./data/cleanTripLisbon.csv", encoding="utf-8", csv_args={'delimiter': ','})
data = loader.load()
text_splitter = CharacterTextSplitter(separator='\n')
text_chunks = text_splitter.split_documents(data)
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
docsearch = FAISS.from_documents(text_chunks, embeddings)
docsearch.save_local(DB_FAISS_PATH)
I have tried multiple ways to put my chatbot to give answers based on the content inside of the csv and unfortunately, never worked. The csv is about attractions to visit and columns such as name, about, address, ratings… And below is the chatbot, that is in the file chatbot.py
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders.csv_loader import CSVLoader
from util import local_settings
# from langchain.llms import OpenAI
from openai import OpenAI
from embeddings import embeddings
vectorstore = FAISS.load_local("vectorstore/db_faiss", embeddings)
# [i] #
# [i] OpenAI API #
# [i] #
class GPT_Helper:
def __init__(self,
OPENAI_API_KEY: str,
system_behavior: str="",
model="gpt-3.5-turbo",
):
self.client = OpenAI(api_key=OPENAI_API_KEY)
self.messages = []
self.model = model
if system_behavior:
self.messages.append({
"role": "system",
"content": system_behavior
})
# [i] get completion from the model
def get_completion(self, prompt, temperature=0):
self.messages = [] # Clear messages list before each interaction
self.messages.append({"role": "user", "content": prompt})
completion = self.client.chat.completions.create(
model=self.model,
messages=self.messages,
temperature=temperature,
)
self.messages.append(
{
"role": "assistant",
"content": completion.choices[0].message.content
}
)
return completion.choices[0].message.content
# [i] #
# [i] AttractionBot #
# [i] #
class AttractionBot:
"""
Generate a response by using LLMs.
"""
def __init__(self, system_behavior: str):
self._system_behavior = system_behavior
self._username = None # Add a private attribute to store the username
self.engine = GPT_Helper(
OPENAI_API_KEY=local_settings.OPENAI_API_KEY,
system_behavior=system_behavior
)
def set_username(self, username):
self._username = username
def generate_response(self, message: str):
# Include the username in the message if available
user_message = f"{self._username}: {message}" if self._username else message
query_embeddings = embeddings.embed_query(user_message)
matching_documents = vectorstore.similarity_search_by_vector(query_embeddings)
retrieved_information = self.retrieve_information(matching_documents)
response = self.engine.get_completion(retrieved_information)
return response
@staticmethod
def retrieve_information(matching_documents):
# Extract the relevant information from the matching documents
information = []
for doc in matching_documents:
# Assuming that the CSV file has columns "attraction_name" and "description"
attraction_name = doc.page_content
information.append(attraction_name)
return information
def __str__(self):
shift = " "
class_name = str(type(self)).split('.')[-1].replace("'>", "")
return f"🤖 {class_name}."
def reset(self):
...
@property
def memory(self):
return self.engine.messages
@property
def system_behavior(self):
return self._system_behavior
@system_behavior.setter
def system_behavior(self, system_config: str):
self._system_behavior = system_config
Could someone helping me understand where is the problem? I am new on working with Langchain. Thanks in advance.