I got it up and running by doing it slightly different
def infer_async():
sagemaker_runtime = boto3.client("sagemaker-runtime")
# Specify the location of the input. Should be JSON with the input audion file (example in 02_deploy_whisper-Async.ipynb notebook)
input_location = "s3://async-inf/input.json"
# The name of the endpoint. The name must be unique within an AWS Region in your AWS account.
# After you deploy a model using SageMaker hosting
# services, your client applications use this API to get inferences
# from the model hosted at the specified endpoint.
response = sagemaker_runtime.invoke_endpoint_async(
EndpointName=endpoint_name,
# ContentType='audio/mpeg',
InputLocation=input_location,
)
print(response)