1919"""
2020
2121import asyncio
22- import json
2322
24- import httpx
25- from openai import OpenAI
23+ from openai import AsyncOpenAI , OpenAI
2624
2725from vllm .assets .audio import AudioAsset
2826
@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
4745 print ("transcription result:" , transcription .text )
4846
4947
50- async def stream_openai_response (audio_path : str , base_url : str , api_key : str ):
48+ async def stream_openai_response (audio_path : str , client : AsyncOpenAI ):
5149 """
52- Perform streaming transcription using vLLM's raw HTTP streaming API.
50+ Perform asynchronous transcription using OpenAI-compatible API.
5351 """
54- data = {
55- "language" : "en" ,
56- "stream" : True ,
57- "model" : "openai/whisper-large-v3" ,
58- }
59- url = base_url + "/audio/transcriptions"
60- headers = {"Authorization" : f"Bearer { api_key } " }
61- print ("transcription result:" , end = " " )
62- # OpenAI Transcription API client does not support streaming.
63- async with httpx .AsyncClient () as client :
64- with open (audio_path , "rb" ) as f :
65- async with client .stream (
66- "POST" , url , files = {"file" : f }, data = data , headers = headers
67- ) as response :
68- async for line in response .aiter_lines ():
69- # Each line is a JSON object prefixed with 'data: '
70- if line :
71- if line .startswith ("data: " ):
72- line = line [len ("data: " ) :]
73- # Last chunk, stream ends
74- if line .strip () == "[DONE]" :
75- break
76- # Parse the JSON response
77- chunk = json .loads (line )
78- # Extract and print the content
79- content = chunk ["choices" ][0 ].get ("delta" , {}).get ("content" )
80- print (content , end = "" )
52+ print ("\n transcription result:" , end = " " )
53+ with open (audio_path , "rb" ) as f :
54+ transcription = await client .audio .transcriptions .create (
55+ file = f ,
56+ model = "openai/whisper-large-v3" ,
57+ language = "en" ,
58+ response_format = "json" ,
59+ temperature = 0.0 ,
60+ # Additional sampling params not provided by OpenAI API.
61+ extra_body = dict (
62+ seed = 420 ,
63+ top_p = 0.6 ,
64+ ),
65+ stream = True ,
66+ )
67+ async for chunk in transcription :
68+ if chunk .choices :
69+ content = chunk .choices [0 ].get ("delta" , {}).get ("content" )
70+ print (content , end = "" , flush = True )
71+
8172 print () # Final newline after stream ends
8273
8374
@@ -95,7 +86,11 @@ def main():
9586
9687 sync_openai (mary_had_lamb , client )
9788 # Run the asynchronous function
98- asyncio .run (stream_openai_response (winning_call , openai_api_base , openai_api_key ))
89+ client = AsyncOpenAI (
90+ api_key = openai_api_key ,
91+ base_url = openai_api_base ,
92+ )
93+ asyncio .run (stream_openai_response (winning_call , client ))
9994
10095
10196if __name__ == "__main__" :
0 commit comments