diff --git a/examples/online_serving/openai_transcription_client.py b/examples/online_serving/openai_transcription_client.py index 755038a76139..0d1d73fb14a3 100644 --- a/examples/online_serving/openai_transcription_client.py +++ b/examples/online_serving/openai_transcription_client.py @@ -19,10 +19,8 @@ """ import asyncio -import json -import httpx -from openai import OpenAI +from openai import AsyncOpenAI, OpenAI from vllm.assets.audio import AudioAsset @@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI): print("transcription result:", transcription.text) -async def stream_openai_response(audio_path: str, base_url: str, api_key: str): +async def stream_openai_response(audio_path: str, client: AsyncOpenAI): """ - Perform streaming transcription using vLLM's raw HTTP streaming API. + Perform asynchronous transcription using OpenAI-compatible API. """ - data = { - "language": "en", - "stream": True, - "model": "openai/whisper-large-v3", - } - url = base_url + "/audio/transcriptions" - headers = {"Authorization": f"Bearer {api_key}"} - print("transcription result:", end=" ") - # OpenAI Transcription API client does not support streaming. - async with httpx.AsyncClient() as client: - with open(audio_path, "rb") as f: - async with client.stream( - "POST", url, files={"file": f}, data=data, headers=headers - ) as response: - async for line in response.aiter_lines(): - # Each line is a JSON object prefixed with 'data: ' - if line: - if line.startswith("data: "): - line = line[len("data: ") :] - # Last chunk, stream ends - if line.strip() == "[DONE]": - break - # Parse the JSON response - chunk = json.loads(line) - # Extract and print the content - content = chunk["choices"][0].get("delta", {}).get("content") - print(content, end="") + print("\ntranscription result:", end=" ") + with open(audio_path, "rb") as f: + transcription = await client.audio.transcriptions.create( + file=f, + model="openai/whisper-large-v3", + language="en", + response_format="json", + temperature=0.0, + # Additional sampling params not provided by OpenAI API. + extra_body=dict( + seed=420, + top_p=0.6, + ), + stream=True, + ) + async for chunk in transcription: + if chunk.choices: + content = chunk.choices[0].get("delta", {}).get("content") + print(content, end="", flush=True) + print() # Final newline after stream ends @@ -95,7 +86,11 @@ def main(): sync_openai(mary_had_lamb, client) # Run the asynchronous function - asyncio.run(stream_openai_response(winning_call, openai_api_base, openai_api_key)) + client = AsyncOpenAI( + api_key=openai_api_key, + base_url=openai_api_base, + ) + asyncio.run(stream_openai_response(winning_call, client)) if __name__ == "__main__": diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 3b5281962b2d..27d75b2f1fea 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel): timestamps incurs additional latency. """ - # --8<-- [start:transcription-extra-params] stream: Optional[bool] = False - """Custom field not present in the original OpenAI definition. When set, - it will enable output to be streamed in a similar fashion as the Chat - Completion endpoint. + """When set, it will enable output to be streamed in a similar fashion + as the Chat Completion endpoint. """ + # --8<-- [start:transcription-extra-params] # Flattened stream option to simplify form data. stream_include_usage: Optional[bool] = False stream_continuous_usage_stats: Optional[bool] = False