How to make multiple async calls to AsyncOpenAI and return results to Gradio UI

In order to enhance the efficiency of some complex prompts in one of my apps, I took a look at breaking up the prompts into multiple requests. While it is certainly possible to serially process those prompts, I had heard of doing asynchronous processing, i.e. sending multiple prompt requests to the GPT API all at once. I took a lot of research and trial and error to track down and piece together the solution, so I thought would post it here to share it out, and save everyone some time and effort! Enjoy! (and yes, the code could be more elegant, so certainly look forward to suggestions on that!)

import os
import asyncio
from openai import AsyncOpenAI
from dotenv import load_dotenv
import json
import gradio as gr
from datetime import datetime, timedelta, date

load_dotenv()

client = AsyncOpenAI(api_key=os.environ.get('OPENAI_API_KEY'))

prompt1 = "write a 5 paragraph explanation of how to use python async and await. Return a JSON structure as follows {'prompt_name': 'prompt1','response': '[response]'}"
prompt2 = "write a 5 paragraph explanation of limitations for using asyncio.run(). Return a JSON structure as follows {'prompt_name': 'prompt2','response': '[response}'}"
prompt3 = "write a 5 paragraph explanation of how to use asyncio.get_running_loop(). Return a JSON structure as follows {'prompt_name': 'prompt3','response': '[response]'}"
prompt4 = "write a 5 paragraph explanation of how to use asyncio.gather(). Return a JSON structure as follows {'prompt_name': 'prompt4','response': '[response]'}"

async def generate_response(prompt, prompt_name):
    try:
        print(f"Generating response for {prompt_name}")
        response = await client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            model="gpt-4-turbo-preview",
            response_format={"type": "json_object"},
            temperature=0
        )

        gpt_response = response.choices[0].message.content

        data = {}
        if isinstance(gpt_response, str):
            try:
                data = json.loads(gpt_response)  # Parse the JSON string into a Python dictionary
            except json.JSONDecodeError:
                print(f"Failed to parse JSON response for {prompt_name}")
                return None
        elif isinstance(gpt_response, dict):
            data = gpt_response

        returned_prompt = data.get('prompt_name', 'Unknown')
        print(f"Got response for {returned_prompt} for {prompt_name} request")
        return response
    except Exception as e:
        print(f"An error occurred while generating response for {prompt_name}: {e}")
        return None

async def queue_api_calls(prompt1a,prompt2a,prompt3a,prompt4a):

    result1 = ''
    result2 = ''
    result3 = ''
    result4 = ''
    responses = await asyncio.gather(
        generate_response(prompt1a,"Prompt 1"),
        generate_response(prompt2a,"Prompt 2"),
        generate_response(prompt3a,"Prompt 3"),
        generate_response(prompt4a,"Prompt 4")
    )
    #Note: Although the API calls get processed in async order, asyncio.gather and returns them in the request order
    results = []

    # Iterate through each response
    for response in responses:
        gpt_response = response.choices[0].message.content
        data = json.loads(gpt_response)

        # Extract the response and add it to the results array
        response_text = data.get('response')
        if response_text:
            results.append(response_text)

    return {prompt1_response: gr.Textbox(value=results[0]), prompt2_response: gr.Textbox(value=results[1]),
            prompt3_response: gr.Textbox(value=results[2]), prompt4_response: gr.Textbox(value=results[3])}

def asyncchat(prompt1a,prompt2a,prompt3a,prompt4a):
    #Note: typical examples show using asyncio.run(), but that can only be called once per thread. Since this function can be called multiple times, creating and reusing the loop is what works
    try:
        #see if there is a loop already running. If there is, reuse it.
        loop = asyncio.get_running_loop()
    except RuntimeError:
        # Create new event loop if one is not running
        loop = asyncio.new_event_loop()
        asyncio.set_event_loop(loop)

    #this sends the prompts to function that sets up the async calls. Once all the calls to the API complete, it returns a list of the gr.Textbox with value= set.
    cmpt_return = loop.run_until_complete(queue_api_calls(prompt1a, prompt2a, prompt3a, prompt4a))
    return cmpt_return

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column(0):
            prompt1_tbox = gr.Textbox(label="prompt 1",value=prompt1)
            prompt2_tbox = gr.Textbox(label="prompt 2", value=prompt2)
            prompt3_tbox = gr.Textbox(label="prompt 3", value=prompt3)
            prompt4_tbox = gr.Textbox(label="prompt 4", value=prompt4)
            submit_btn = gr.Button(value='Submit')
            prompt1_response = gr.Textbox(label="Prompt 1 Response")
            prompt2_response = gr.Textbox(label="Prompt 2 Response")
            prompt3_response = gr.Textbox(label="Prompt 3 Response")
            prompt4_response = gr.Textbox(label="Prompt 4 Response")
            submit_btn.click(fn=asyncchat,inputs=[prompt1_tbox,prompt2_tbox,prompt3_tbox,prompt4_tbox],outputs=[prompt1_response, prompt2_response,prompt3_response,prompt4_response])

if __name__ == "__main__":
    demo.launch()
1 Like