Disable determinism in inference API text generation

According to the Inference API documentation, when the option “use_cache” is set to false, it should not return deterministic output, but does not work for me.

def huggingface_api_completion(prompt: str) -> str:
    API_URL = f"https://api-inference.huggingface.co/models/{MODEL_V2}"
    headers = {"Authorization": f"Bearer {os.environ.get('HUGGINGFACE_KEY')}"}

    data = json.dumps(
            "inputs": prompt,
            "parameters": {
                "max_length": round(len(prompt) / 5) + 100,
                "num_return_sequences": 1,
                "return_text": False,
                "return_full_text": False,
                "do_sample": True,
                "top_k": 50,
                "top_p": 0.95,
                "end_sequence": "\n"
            "options": {
                "wait_for_model": True,
                "use_cache": False,
    response = requests.request("POST", API_URL, headers=headers, data=data)
    data = json.loads(response.content.decode("utf-8"))
    completions = data[0]["generated_text"].strip()
    return completions

How can I enforce non-deterministic outputs? When doing local inference I tweak the seed but here IDK?