18182. Streaming transcription using raw HTTP request to the vLLM server.
1919"""
2020
21+ import argparse
2122import asyncio
2223
2324from openai import AsyncOpenAI , OpenAI
2425
2526from vllm .assets .audio import AudioAsset
2627
2728
28- def sync_openai (audio_path : str , client : OpenAI ):
29+ def sync_openai (audio_path : str , client : OpenAI , model : str ):
2930 """
3031 Perform synchronous transcription using OpenAI-compatible API.
3132 """
3233 with open (audio_path , "rb" ) as f :
3334 transcription = client .audio .transcriptions .create (
3435 file = f ,
35- model = "openai/whisper-large-v3" ,
36+ model = model ,
3637 language = "en" ,
3738 response_format = "json" ,
3839 temperature = 0.0 ,
@@ -42,18 +43,18 @@ def sync_openai(audio_path: str, client: OpenAI):
4243 repetition_penalty = 1.3 ,
4344 ),
4445 )
45- print ("transcription result:" , transcription .text )
46+ print ("transcription result [sync] :" , transcription .text )
4647
4748
48- async def stream_openai_response (audio_path : str , client : AsyncOpenAI ):
49+ async def stream_openai_response (audio_path : str , client : AsyncOpenAI , model : str ):
4950 """
5051 Perform asynchronous transcription using OpenAI-compatible API.
5152 """
52- print ("\n transcription result:" , end = " " )
53+ print ("\n transcription result [stream] :" , end = " " )
5354 with open (audio_path , "rb" ) as f :
5455 transcription = await client .audio .transcriptions .create (
5556 file = f ,
56- model = "openai/whisper-large-v3" ,
57+ model = model ,
5758 language = "en" ,
5859 response_format = "json" ,
5960 temperature = 0.0 ,
@@ -72,7 +73,47 @@ async def stream_openai_response(audio_path: str, client: AsyncOpenAI):
7273 print () # Final newline after stream ends
7374
7475
75- def main ():
76+ def stream_api_response (audio_path : str , model : str , openai_api_base : str ):
77+ """
78+ Perform streaming transcription using raw HTTP requests to the vLLM API server.
79+ """
80+ import json
81+ import os
82+
83+ import requests
84+
85+ api_url = f"{ openai_api_base } /audio/transcriptions"
86+ headers = {"User-Agent" : "Transcription-Client" }
87+ with open (audio_path , "rb" ) as f :
88+ files = {"file" : (os .path .basename (audio_path ), f )}
89+ data = {
90+ "stream" : "true" ,
91+ "model" : model ,
92+ "language" : "en" ,
93+ "response_format" : "json" ,
94+ }
95+
96+ print ("\n transcription result [stream]:" , end = " " )
97+ response = requests .post (
98+ api_url , headers = headers , files = files , data = data , stream = True
99+ )
100+ for chunk in response .iter_lines (
101+ chunk_size = 8192 , decode_unicode = False , delimiter = b"\n "
102+ ):
103+ if chunk :
104+ data = chunk [len ("data: " ) :]
105+ data = json .loads (data .decode ("utf-8" ))
106+ data = data ["choices" ][0 ]
107+ delta = data ["delta" ]["content" ]
108+ print (delta , end = "" , flush = True )
109+
110+ finish_reason = data .get ("finish_reason" )
111+ if finish_reason is not None :
112+ print (f"\n [Stream finished reason: { finish_reason } ]" )
113+ break
114+
115+
116+ def main (args ):
76117 mary_had_lamb = str (AudioAsset ("mary_had_lamb" ).get_local_path ())
77118 winning_call = str (AudioAsset ("winning_call" ).get_local_path ())
78119
@@ -84,14 +125,41 @@ def main():
84125 base_url = openai_api_base ,
85126 )
86127
87- sync_openai (mary_had_lamb , client )
128+ model = client .models .list ().data [0 ].id
129+ print (f"Using model: { model } " )
130+
131+ # Run the synchronous function
132+ sync_openai (args .audio_path if args .audio_path else mary_had_lamb , client , model )
133+
88134 # Run the asynchronous function
89- client = AsyncOpenAI (
90- api_key = openai_api_key ,
91- base_url = openai_api_base ,
92- )
93- asyncio .run (stream_openai_response (winning_call , client ))
135+ if "openai" in model :
136+ client = AsyncOpenAI (
137+ api_key = openai_api_key ,
138+ base_url = openai_api_base ,
139+ )
140+ asyncio .run (
141+ stream_openai_response (
142+ args .audio_path if args .audio_path else winning_call , client , model
143+ )
144+ )
145+ else :
146+ stream_api_response (
147+ args .audio_path if args .audio_path else winning_call ,
148+ model ,
149+ openai_api_base ,
150+ )
94151
95152
96153if __name__ == "__main__" :
97- main ()
154+ # setup argparser
155+ parser = argparse .ArgumentParser (
156+ description = "OpenAI Transcription Client using vLLM API Server"
157+ )
158+ parser .add_argument (
159+ "--audio_path" ,
160+ type = str ,
161+ default = None ,
162+ help = "The path to the audio file to transcribe." ,
163+ )
164+ args = parser .parse_args ()
165+ main (args )
0 commit comments