TypeError: text_generation() got an unexpected keyword argument 'token'

OctopusMode · August 5, 2025, 9:37pm

I’m trying to create a Streamlit chatbot using Mistral-7B-Instruct-v0.3, and after entering my input in the chat, I received such error.
Here’s the code:
#########################################################################
import os
from langchain_huggingface import HuggingFaceEndpoint
import streamlit as st
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

model_id=“mistralai/Mistral-7B-Instruct-v0.3”

def get_llm_hf_inference(model_id=model_id, max_new_tokens=128, temperature=0.1):
“”"
Returns a language model for HuggingFace inference.
Parameters:
- model_id (str): The ID of the HuggingFace model repository.
- max_new_tokens (int): The maximum number of new tokens to generate.
- temperature (float): The temperature for sampling from the model.
Returns:
- llm (HuggingFaceEndpoint): The language model for HuggingFace inference.
“”"
llm = HuggingFaceEndpoint(
repo_id=model_id,
max_new_tokens=max_new_tokens,
temperature=temperature,
token = os.getenv(“HF_TOKEN”)
)
return llm

Configure the Streamlit app

st.set_page_config(page_title=“HuggingFace ChatBot”, page_icon=“”)
st.title(“Personal HuggingFace ChatBot”)
st.markdown(f"This is a simple chatbot that uses the HuggingFace transformers library to generate responses to your text input. It uses the {model_id}.")

Initialize session state for avatars

if “avatars” not in st.session_state:
st.session_state.avatars = {‘user’: None, ‘assistant’: None}

Initialize session state for user text input

if ‘user_text’ not in st.session_state:
st.session_state.user_text = None

Initialize session state for model parameters

if “max_response_length” not in st.session_state:
st.session_state.max_response_length = 256

if “system_message” not in st.session_state:
st.session_state.system_message = “friendly AI conversing with a human user”

if “starter_message” not in st.session_state:
st.session_state.starter_message = “Hello, there! How can I help you today?”

Sidebar for settings

with st.sidebar:
st.header(“System Settings”)

# AI Settings
st.session_state.system_message = st.text_area(
    "System Message", value="You are a friendly AI conversing with a human user."
)
st.session_state.starter_message = st.text_area(
    'First AI Message', value="Hello, there! How can I help you today?"
)

# Model Settings
st.session_state.max_response_length = st.number_input(
    "Max Response Length", value=128
)

# Avatar Selection
st.markdown("*Select Avatars:*")
col1, col2 = st.columns(2)
with col1:
    st.session_state.avatars['assistant'] = st.selectbox(
        "AI Avatar", options=["🤗", "💬", "🤖"], index=0
    )
with col2:
    st.session_state.avatars['user'] = st.selectbox(
        "User Avatar", options=["👤", "👱‍♂️", "👨🏾", "👩", "👧🏾"], index=0
    )
# Reset Chat History
reset_history = st.button("Reset Chat History")

Initialize or reset chat history

if “chat_history” not in st.session_state or reset_history:
st.session_state.chat_history = [{“role”: “assistant”, “content”: st.session_state.starter_message}]

def get_response(system_message, chat_history, user_text,
eos_token_id=[‘User’], max_new_tokens=256, get_llm_hf_kws={}):
“”"
Generates a response from the chatbot model.
Args:
system_message (str): The system message for the conversation.
chat_history (list): The list of previous chat messages.
user_text (str): The user’s input text.
model_id (str, optional): The ID of the HuggingFace model to use.
eos_token_id (list, optional): The list of end-of-sentence token IDs.
max_new_tokens (int, optional): The maximum number of new tokens to generate.
get_llm_hf_kws (dict, optional): Additional keyword arguments for the get_llm_hf function.
Returns:
tuple: A tuple containing the generated response and the updated chat history.
“”"
# Set up the model
hf = get_llm_hf_inference(max_new_tokens=max_new_tokens, temperature=0.1)

# Create the prompt template
prompt = PromptTemplate.from_template(
    (
        "[INST] {system_message}"
        "\nCurrent Conversation:\n{chat_history}\n\n"
        "\nUser: {user_text}.\n [/INST]"
        "\nAI:"
    )
)
# Make the chain and bind the prompt
chat = prompt | hf.bind(skip_prompt=True) | StrOutputParser(output_key='content')

# Generate the response
response = chat.invoke(input=dict(system_message=system_message, user_text=user_text, chat_history=chat_history))
response = response.split("AI:")[-1]

# Update the chat history
chat_history.append({'role': 'user', 'content': user_text})
chat_history.append({'role': 'assistant', 'content': response})
return response, chat_history

Chat interface

chat_interface = st.container(border=True)
with chat_interface:
output_container = st.container()
st.session_state.user_text = st.chat_input(placeholder=“Enter your text here.”)

Display chat messages

with output_container:
# For every message in the history
for message in st.session_state.chat_history:
# Skip the system message
if message[‘role’] == ‘system’:
continue

    # Display the chat message using the correct avatar
    with st.chat_message(message['role'], 
                         avatar=st.session_state['avatars'][message['role']]):
        st.markdown(message['content'])

When the user enter new text:

if st.session_state.user_text:
    
    # Display the user's new message immediately
    with st.chat_message("user", 
                         avatar=st.session_state.avatars['user']):
        st.markdown(st.session_state.user_text)
        
    # Display a spinner status bar while waiting for the response
    with st.chat_message("assistant", 
                         avatar=st.session_state.avatars['assistant']):

        with st.spinner("Thinking..."):
            # Call the Inference API with the system_prompt, user text, and history
            response, st.session_state.chat_history = get_response(
                system_message=st.session_state.system_message, 
                user_text=st.session_state.user_text,
                chat_history=st.session_state.chat_history,
                max_new_tokens=st.session_state.max_response_length,
            )
            st.markdown(response)

##############################################
Here is the error:
TypeError: text_generation() got an unexpected keyword argument ‘token’

Traceback:

File "/app/src/streamlit_app.py", line 162, in <module>
    response, st.session_state.chat_history = get_response(File "/app/src/streamlit_app.py", line 121, in get_response
    response = chat.invoke(input=dict(system_message=system_message, user_text=user_text, chat_history=chat_history))File "/usr/local/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 3046, in invoke
    input_ = context.run(step.invoke, input_, config)File "/usr/local/lib/python3.9/site-packages/langchain_core/runnables/base.py", line 5434, in invoke
    return self.bound.invoke(File "/usr/local/lib/python3.9/site-packages/langchain_core/language_models/llms.py", line 389, in invoke
    self.generate_prompt(File "/usr/local/lib/python3.9/site-packages/langchain_core/language_models/llms.py", line 766, in generate_prompt
    return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)File "/usr/local/lib/python3.9/site-packages/langchain_core/language_models/llms.py", line 971, in generate
    return self._generate_helper(File "/usr/local/lib/python3.9/site-packages/langchain_core/language_models/llms.py", line 792, in _generate_helper
    self._generate(File "/usr/local/lib/python3.9/site-packages/langchain_core/language_models/llms.py", line 1544, in _generate
    self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)File "/usr/local/lib/python3.9/site-packages/langchain_huggingface/llms/huggingface_endpoint.py", line 318, in _call
    response_text = self.client.text_generation(

######################
Please help me!

John6666 · August 5, 2025, 10:38pm

I recommend updating the huggingface_hub library to the latest version and using max_new_tokens instead of max_length and huggingfacehub_api_token instead of token.

pip install -U huggingface_hub[hf_xet]

If you really need to use the old version of the code, I think there is a way to downgrade the huggingface_hub library.

pip install huggingface_hub<0.25

OctopusMode · August 6, 2025, 5:20pm

It still doesn’t work, I get additional error, and it took me so long to build and rebuild app.py on Hugging Face so I’m trying something else now. Thank you!

Topic		Replies	Views
Using Langchain ChatHuggingface with Text Generation Inference: missing field `inputs` Beginners	2	755	August 8, 2024
TypeError: InferenceClient.text_generation() got an unexpected keyword argument Beginners	1	118	June 28, 2025
TypeError: InferenceClient.text_generation() got an unexpected keyword argument 'token' Beginners	5	180	June 10, 2025
Facing issue using a model hosted on HuggingFace Server and talking to it using API_KEY Beginners	7	482	May 12, 2025
Issues regarding using model google t-5 large 🤗Datasets	1	210	June 24, 2024