Using of context_window in parameter

max_token_limit = 1024  # Maximum token limit of the language model
window_size = 256  # Size of each context window
overlap = 50  # Number of overlapping tokens between consecutive window

// My input & prompt are lengthy, so i'm trying to do context_window to pass less input at a time due to token limit of Falcon model. Can you help here, what i can best here ? 

input_prompt = f"Prompt: {prompt} \n\n\n\n for input: {input_text5}"

# Tokenize the input text and prompt
input_tokens = tokenizer.encode(input_text, add_special_tokens=False)

prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False)

# Calculate the maximum number of tokens available for the input text
available_tokens = max_token_limit - len(prompt_tokens)

# Determine the number of windows needed based on the available tokens
num_windows = (len(input_tokens) - 1) // (available_tokens - overlap) + 1

num_windows = 1

output_text = ""  # Variable to store the generated output

# Process each context window
for i in range(num_windows):
    # Calculate the start and end positions for the current window
    start_pos = i * (available_tokens - overlap)
    end_pos = min(start_pos + window_size, len(input_tokens))

    # Extract the tokens for the current window
    window_tokens = input_tokens[start_pos:end_pos]

    # Concatenate the prompt tokens and the current window tokens
    tokens = prompt_tokens + window_tokens
    
    # Prepare the input data in the required format for the predict() method
    input_data = {
        "inputs": input_prompt,
        "parameters": {
            "temperature": 0.9,
            "top_p": 0.9,
            "max_new_tokens": 1024,
            "stop": ["<|endoftext|>", "</s>"],
            "do_sample": True
        }
    }
    
    # Convert the input_data dictionary to JSON format
    #input_data_json = json.dumps(input_data)

    # Make predictions using the deployed endpoint
    predictions = predictor.predict(input_data)
    
    #Convert the input data to JSON
    encoded_input_data = json.dumps(input_data).encode("utf-8")

    predictions = predictor.predict(encoded_input_data)

    output_tokens = predictions['predicted_tokens']

    #Decode the output tokens using the tokenizer
    output_text += tokenizer.decode(output_tokens)
 
print(output_text)

This piece of code is failing because the parameters that i want to pass doesn’t match its expectations, mostly it needs strings only, but i want to pass tokens only, so that i can use context_window !

Can someone suggest, how can i leverage context_window here ?