In order to enhance the efficiency of some complex prompts in one of my apps, I took a look at breaking up the prompts into multiple requests. While it is certainly possible to serially process those prompts, I had heard of doing asynchronous processing, i.e. sending multiple prompt requests to the GPT API all at once. I took a lot of research and trial and error to track down and piece together the solution, so I thought would post it here to share it out, and save everyone some time and effort! Enjoy! (and yes, the code could be more elegant, so certainly look forward to suggestions on that!)
import os
import asyncio
from openai import AsyncOpenAI
from dotenv import load_dotenv
import json
import gradio as gr
from datetime import datetime, timedelta, date
load_dotenv()
client = AsyncOpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
prompt1 = "write a 5 paragraph explanation of how to use python async and await. Return a JSON structure as follows {'prompt_name': 'prompt1','response': '[response]'}"
prompt2 = "write a 5 paragraph explanation of limitations for using asyncio.run(). Return a JSON structure as follows {'prompt_name': 'prompt2','response': '[response}'}"
prompt3 = "write a 5 paragraph explanation of how to use asyncio.get_running_loop(). Return a JSON structure as follows {'prompt_name': 'prompt3','response': '[response]'}"
prompt4 = "write a 5 paragraph explanation of how to use asyncio.gather(). Return a JSON structure as follows {'prompt_name': 'prompt4','response': '[response]'}"
async def generate_response(prompt, prompt_name):
try:
print(f"Generating response for {prompt_name}")
response = await client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="gpt-4-turbo-preview",
response_format={"type": "json_object"},
temperature=0
)
gpt_response = response.choices[0].message.content
data = {}
if isinstance(gpt_response, str):
try:
data = json.loads(gpt_response) # Parse the JSON string into a Python dictionary
except json.JSONDecodeError:
print(f"Failed to parse JSON response for {prompt_name}")
return None
elif isinstance(gpt_response, dict):
data = gpt_response
returned_prompt = data.get('prompt_name', 'Unknown')
print(f"Got response for {returned_prompt} for {prompt_name} request")
return response
except Exception as e:
print(f"An error occurred while generating response for {prompt_name}: {e}")
return None
async def queue_api_calls(prompt1a,prompt2a,prompt3a,prompt4a):
result1 = ''
result2 = ''
result3 = ''
result4 = ''
responses = await asyncio.gather(
generate_response(prompt1a,"Prompt 1"),
generate_response(prompt2a,"Prompt 2"),
generate_response(prompt3a,"Prompt 3"),
generate_response(prompt4a,"Prompt 4")
)
#Note: Although the API calls get processed in async order, asyncio.gather and returns them in the request order
results = []
# Iterate through each response
for response in responses:
gpt_response = response.choices[0].message.content
data = json.loads(gpt_response)
# Extract the response and add it to the results array
response_text = data.get('response')
if response_text:
results.append(response_text)
return {prompt1_response: gr.Textbox(value=results[0]), prompt2_response: gr.Textbox(value=results[1]),
prompt3_response: gr.Textbox(value=results[2]), prompt4_response: gr.Textbox(value=results[3])}
def asyncchat(prompt1a,prompt2a,prompt3a,prompt4a):
#Note: typical examples show using asyncio.run(), but that can only be called once per thread. Since this function can be called multiple times, creating and reusing the loop is what works
try:
#see if there is a loop already running. If there is, reuse it.
loop = asyncio.get_running_loop()
except RuntimeError:
# Create new event loop if one is not running
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
#this sends the prompts to function that sets up the async calls. Once all the calls to the API complete, it returns a list of the gr.Textbox with value= set.
cmpt_return = loop.run_until_complete(queue_api_calls(prompt1a, prompt2a, prompt3a, prompt4a))
return cmpt_return
with gr.Blocks() as demo:
with gr.Row():
with gr.Column(0):
prompt1_tbox = gr.Textbox(label="prompt 1",value=prompt1)
prompt2_tbox = gr.Textbox(label="prompt 2", value=prompt2)
prompt3_tbox = gr.Textbox(label="prompt 3", value=prompt3)
prompt4_tbox = gr.Textbox(label="prompt 4", value=prompt4)
submit_btn = gr.Button(value='Submit')
prompt1_response = gr.Textbox(label="Prompt 1 Response")
prompt2_response = gr.Textbox(label="Prompt 2 Response")
prompt3_response = gr.Textbox(label="Prompt 3 Response")
prompt4_response = gr.Textbox(label="Prompt 4 Response")
submit_btn.click(fn=asyncchat,inputs=[prompt1_tbox,prompt2_tbox,prompt3_tbox,prompt4_tbox],outputs=[prompt1_response, prompt2_response,prompt3_response,prompt4_response])
if __name__ == "__main__":
demo.launch()