TypeError: InferenceClient.text_generation() got an unexpected keyword argument 'token'

##########################Code#####################################
import os

from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader

Step 1: Setup LLM (Mistral with HuggingFace)

HF_TOKEN=os.environ.get(“HF_TOKEN”)
HUGGINGFACE_REPO_ID=“mistralai/Mistral-7B-Instruct-v0.3”

def load_llm(huggingface_repo_id):
llm=HuggingFaceEndpoint(
repo_id=huggingface_repo_id,
temperature=0.5,
model_kwargs={“token”:HF_TOKEN,
“max_length”:“512”}
)
return llm

Step 2: Connect LLM with FAISS and Create chain

CUSTOM_PROMPT_TEMPLATE = “”"
Use the pieces of information provided in the context to answer user’s question.
If you dont know the answer, just say that you dont know, dont try to make up an answer.
Dont provide anything out of the given context

Context: {context}
Question: {question}

Start the answer directly. No small talk please.
“”"

def set_custom_prompt(custom_prompt_template):
prompt=PromptTemplate(template=custom_prompt_template, input_variables=[“context”, “question”])
return prompt

Load Database

DB_FAISS_PATH=“vectorstore/db_faiss”
embedding_model=HuggingFaceEmbeddings(model_name=“sentence-transformers/all-MiniLM-L6-v2”)
db=FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)

Create QA chain

qa_chain=RetrievalQA.from_chain_type(
llm=load_llm(HUGGINGFACE_REPO_ID),
chain_type=“stuff”,
retriever=db.as_retriever(search_kwargs={‘k’:3}),
return_source_documents=True,
chain_type_kwargs={‘prompt’:set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
)

Now invoke with a single query

user_query=input("Write Query Here: ")
response=qa_chain.invoke({‘query’: user_query})
print("RESULT: ", response[“result”])
print("SOURCE DOCUMENTS: ", response[“source_documents”])
###############################Code#######################################

Traceback (most recent call last):
File “c:\Users\manto\Documents\medibot_rag\connect_with_memory_llm.py”, line 56, in
response=qa_chain.invoke({‘query’: user_query})
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 167, in invoke
raise e
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 157, in invoke
self._call(inputs, run_manager=run_manager)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\retrieval_qa\base.py”, line 154, in _call
answer = self.combine_documents_chain.run(
input_documents=docs, question=question, callbacks=_run_manager.get_child()
)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core_api\deprecation.py”, line 191, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 608, in run
return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core_api\deprecation.py”, line 191, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 386, in call
return self.invoke(
~~~~~~~~~~~^
inputs,
^^^^^^^
…<2 lines>…
include_run_info=include_run_info,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 167, in invoke
raise e
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 157, in invoke
self._call(inputs, run_manager=run_manager)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\combine_documents\base.py”, line 138,
in _call
output, extra_return_dict = self.combine_docs(
~~~~~~~~~~~~~~~~~^
docs, callbacks=_run_manager.get_child(), **other_keys
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\combine_documents\stuff.py”, line 259, in combine_docs
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\llm.py”, line 319, in predict
return self(kwargs, callbacks=callbacks)[self.output_key]
~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core_api\deprecation.py”, line 191, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 386, in call
return self.invoke(
~~~~~~~~~~~^
inputs,
^^^^^^^
…<2 lines>…
include_run_info=include_run_info,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 167, in invoke
raise e
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 157, in invoke
self._call(inputs, run_manager=run_manager)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\llm.py”, line 127, in _call
response = self.generate([inputs], run_manager=run_manager)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\llm.py”, line 139, in generate
return self.llm.generate_prompt(
~~~~~~~~~~~~~~~~~~~~~~~~^
prompts,
^^^^^^^^
…<2 lines>…
**self.llm_kwargs,
^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 766, in generate_prompt
return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 973, in generate
return self._generate_helper(
~~~~~~~~~~~~~~~~~~~~~^
prompts,
^^^^^^^^
…<3 lines>…
**kwargs,
^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 792, in _generate_helper
self._generate(
~~~~~~~~~~~~~~^
prompts,
^^^^^^^^
…<3 lines>…
**kwargs,
^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 1547, in
_generate
self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_huggingface\llms\huggingface_endpoint.py”, line 312, in _call
response_text = self.client.text_generation(
prompt=prompt,
model=self.model,
**invocation_params,
)
TypeError: InferenceClient.text_generation() got an unexpected keyword argument ‘token’.

Can you please help me to resolve the issue.

1 Like

import os

from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader

Step 1: Setup LLM (Mistral with HuggingFace)

HF_TOKEN = os.environ.get(“HF_TOKEN”)
HUGGINGFACE_REPO_ID = “mistralai/Mistral-7B-Instruct-v0.3”

def load_llm(huggingface_repo_id):
# DO NOT include “token” in model_kwargs
# Pass the token via environment variable or as a supported class parameter
os.environ[“HUGGINGFACEHUB_API_TOKEN”] = HF_TOKEN
llm = HuggingFaceEndpoint(
repo_id=huggingface_repo_id,
temperature=0.5,
model_kwargs={“max_length”: 512}
# Optionally, for some langchain versions:
# huggingfacehub_api_token=HF_TOKEN
)
return llm

Step 2: Connect LLM with FAISS and Create chain

CUSTOM_PROMPT_TEMPLATE = “”"
Use the pieces of information provided in the context to answer user’s question.
If you don’t know the answer, just say that you don’t know; don’t try to make up an answer.
Don’t provide anything out of the given context.

Context: {context}
Question: {question}

Start the answer directly. No small talk please.
“”"

def set_custom_prompt(custom_prompt_template):
prompt = PromptTemplate(template=custom_prompt_template, input_variables=[“context”, “question”])
return prompt

Load Database

DB_FAISS_PATH = “vectorstore/db_faiss”
embedding_model = HuggingFaceEmbeddings(model_name=“sentence-transformers/all-MiniLM-L6-v2”)
db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)

Create QA chain

qa_chain = RetrievalQA.from_chain_type(
llm=load_llm(HUGGINGFACE_REPO_ID),
chain_type=“stuff”,
retriever=db.as_retriever(search_kwargs={“k”: 3}),
return_source_documents=True,
chain_type_kwargs={“prompt”: set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
)

Now invoke with a single query

user_query = input("Write Query Here: ")
response = qa_chain.invoke({‘query’: user_query})
print("RESULT: ", response[“result”])
print("SOURCE DOCUMENTS: ", response[“source_documents”])

Solution provided by Triskel Data Deterministic AI.

1 Like

Thank you very much your prompt response. I can not expect to get the answer is too early. Thank you very much. But now I get another issue with “max_length”. Can you pls help me to resolve the issue.
Traceback (most recent call last):
File “c:\Users\manto\Documents\medibot_rag\connect_with_memory_llm.py”, line 72, in
response=qa_chain.invoke({‘query’: user_query})
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 167, in invoke
raise e
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 157, in invoke
self._call(inputs, run_manager=run_manager)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\retrieval_qa\base.py”, line 154, in _call
answer = self.combine_documents_chain.run(
input_documents=docs, question=question, callbacks=_run_manager.get_child()
)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core_api\deprecation.py”, line 191, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 608, in run
return self(kwargs, callbacks=callbacks, tags=tags, metadata=metadata)[
~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core_api\deprecation.py”, line 191, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 386, in call
return self.invoke(
~~~~~~~~~~~^
inputs,
^^^^^^^
…<2 lines>…
include_run_info=include_run_info,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 167, in invoke
raise e
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 157, in invoke
self._call(inputs, run_manager=run_manager)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\combine_documents\base.py”, line 138,
in _call
output, extra_return_dict = self.combine_docs(
~~~~~~~~~~~~~~~~~^
docs, callbacks=_run_manager.get_child(), **other_keys
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\combine_documents\stuff.py”, line 259, in combine_docs
return self.llm_chain.predict(callbacks=callbacks, **inputs), {}
~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\llm.py”, line 319, in predict
return self(kwargs, callbacks=callbacks)[self.output_key]
~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core_api\deprecation.py”, line 191, in warning_emitting_wrapper
return wrapped(*args, **kwargs)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 386, in call
return self.invoke(
~~~~~~~~~~~^
inputs,
^^^^^^^
…<2 lines>…
include_run_info=include_run_info,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 167, in invoke
raise e
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\base.py”, line 157, in invoke
self._call(inputs, run_manager=run_manager)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\llm.py”, line 127, in _call
response = self.generate([inputs], run_manager=run_manager)
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain\chains\llm.py”, line 139, in generate
return self.llm.generate_prompt(
~~~~~~~~~~~~~~~~~~~~~~~~^
prompts,
^^^^^^^^
…<2 lines>…
**self.llm_kwargs,
^^^^^^^^^^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 766, in generate_prompt
return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 973, in generate
return self._generate_helper(
~~~~~~~~~~~~~~~~~~~~~^
prompts,
^^^^^^^^
…<3 lines>…
**kwargs,
^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 792, in _generate_helper
self._generate(
~~~~~~~~~~~~~~^
prompts,
^^^^^^^^
…<3 lines>…
**kwargs,
^^^^^^^^^
)
^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_core\language_models\llms.py”, line 1547, in
_generate
self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)
~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:\Users\manto.virtualenvs\medibot_rag-BvpnU3Fv\Lib\site-packages\langchain_huggingface\llms\huggingface_endpoint.py”, line 312, in _call
response_text = self.client.text_generation(
prompt=prompt,
model=self.model,
**invocation_params,
)
TypeError: InferenceClient.text_generation() got an unexpected keyword argument ‘max_length’

1 Like

Remove "max_length" from model_kwargs and only use supported arguments. Pass generation parameters via the proper method or config.

1 Like

You are sweet. Now my chatbot gives me the reply.. I will check, test more and then reply you back for more. Thank you , Thank you , and Thank you !!!

Yea. max_new_tokens is better if you need.