I am implementing a Rag using Hugging Face Model, i have 0.10 version of LLama Index , facing error on query engine
import torch
llm = HuggingFaceLLM(
context_window=4096,
max_new_tokens=256,
generate_kwargs={“temperature”:0.5,“do_sample”:False},
system_prompt=system_prompt,
query_wrapper_prompt=query_wrapper_prompt,
tokenizer_name=‘meta-llama/Llama-2-7b-chat-hf’,
model_name=‘meta-llama/Llama-2-7b-chat-hf’,
device_map=‘auto’,
model_kwargs={‘torch_dtype’:torch.float16},
)
embed_model=LangchainEmbedding(HuggingFaceEmbeddings(model_name=“sentence-transformers/all-mpnet-base-v2”))
from llama_index.core import Settings
from llama_index.core.node_parser import SentenceSplitter
Settings.llm = llm
Settings.embed_model = embed_model
Settings.node_parser = SentenceSplitter(chunk_size=1024, chunk_overlap=20)
index = VectorStoreIndex.from_documents(documents, show_progress=True)
query_engine = index.as_query_engine()
##########Error In Execution##################
response = query_engine.query(“what is a wide ball”)
##########################################
ValueError Traceback (most recent call last)
in <cell line: 2>()
1 query_engine = index.as_query_engine()
----> 2 response = query_engine.query(“what is a wide ball”)
3 # response
15 frames
/usr/local/lib/python3.10/dist-packages/llama_index/legacy/llms/base.py in wrapper_logic(_self)
161 callback_manager = getattr(_self, “callback_manager”, None)
162 if not isinstance(callback_manager, CallbackManager):
→ 163 raise ValueError(
164 "Cannot use llm_completion_callback on an instance "
165 “without a callback_manager attribute.”
ValueError: Cannot use llm_completion_callback on an instance without a callback_manager attribute.
Desperately looking for a solution