max_token_limit = 1024 # Maximum token limit of the language model
window_size = 256 # Size of each context window
overlap = 50 # Number of overlapping tokens between consecutive window
// My input & prompt are lengthy, so i'm trying to do context_window to pass less input at a time due to token limit of Falcon model. Can you help here, what i can best here ?
input_prompt = f"Prompt: {prompt} \n\n\n\n for input: {input_text5}"
# Tokenize the input text and prompt
input_tokens = tokenizer.encode(input_text, add_special_tokens=False)
prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)
# Calculate the maximum number of tokens available for the input text
available_tokens = max_token_limit - len(prompt_tokens)
# Determine the number of windows needed based on the available tokens
num_windows = (len(input_tokens) - 1) // (available_tokens - overlap) + 1
num_windows = 1
output_text = "" # Variable to store the generated output
# Process each context window
for i in range(num_windows):
# Calculate the start and end positions for the current window
start_pos = i * (available_tokens - overlap)
end_pos = min(start_pos + window_size, len(input_tokens))
# Extract the tokens for the current window
window_tokens = input_tokens[start_pos:end_pos]
# Concatenate the prompt tokens and the current window tokens
tokens = prompt_tokens + window_tokens
# Prepare the input data in the required format for the predict() method
input_data = {
"inputs": input_prompt,
"parameters": {
"temperature": 0.9,
"top_p": 0.9,
"max_new_tokens": 1024,
"stop": ["<|endoftext|>", "</s>"],
"do_sample": True
}
}
# Convert the input_data dictionary to JSON format
#input_data_json = json.dumps(input_data)
# Make predictions using the deployed endpoint
predictions = predictor.predict(input_data)
#Convert the input data to JSON
encoded_input_data = json.dumps(input_data).encode("utf-8")
predictions = predictor.predict(encoded_input_data)
output_tokens = predictions['predicted_tokens']
#Decode the output tokens using the tokenizer
output_text += tokenizer.decode(output_tokens)
print(output_text)
This piece of code is failing because the parameters that i want to pass doesn’t match its expectations, mostly it needs strings only, but i want to pass tokens only, so that i can use context_window !
Can someone suggest, how can i leverage context_window here ?