I tried using the max_length parameter and it does limit the lenght but the last sentence is usually incomplete. I also tried to give instructions in the context and input parameters, but that didn’t help.
code below:
import requests
# Set the Hugging Face Inference API endpoint
api_endpoint = "https://api-inference.huggingface.co/models/gpt2"
# Set the token
api_token = "MY_TOKEN"
# Set the prompt
prompt = "What is the meaning of life?"
# Set the headers with the authorization token
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json",
}
# Set the data to be sent in the request
data = {
"inputs": prompt,
# "context": " The maximum length of your response should be less than 128 words and be coherent and not end abruptly because of incomplete sentences",
"parameters": {
"repetition_penalty": 4.0,
"max_length": 128
}
}
# Make a POST request to the Hugging Face Inference API
response = requests.post(api_endpoint, json=data, headers=headers)
# Check if the request was successful
if response.status_code == 200:
# Print the generated response
jsonResponse = response.json()
print(jsonResponse[0]["generated_text"])
# print(response.json())
else:
# Print an error message if the request failed
print(f"Error: {response.status_code}\n{response.text}")