Skip to content

Commit 11bb3f4

Browse files
NickLucchejinzhen-lin
authored andcommitted
[Docs] Update transcriptions API to use openai client with stream=True (vllm-project#20271)
Signed-off-by: NickLucche <[email protected]> Signed-off-by: Jinzhen Lin <[email protected]>
1 parent 502a191 commit 11bb3f4

File tree

2 files changed

+31
-37
lines changed

2 files changed

+31
-37
lines changed

examples/online_serving/openai_transcription_client.py

Lines changed: 28 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,8 @@
1919
"""
2020

2121
import asyncio
22-
import json
2322

24-
import httpx
25-
from openai import OpenAI
23+
from openai import AsyncOpenAI, OpenAI
2624

2725
from vllm.assets.audio import AudioAsset
2826

@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
4745
print("transcription result:", transcription.text)
4846

4947

50-
async def stream_openai_response(audio_path: str, base_url: str, api_key: str):
48+
async def stream_openai_response(audio_path: str, client: AsyncOpenAI):
5149
"""
52-
Perform streaming transcription using vLLM's raw HTTP streaming API.
50+
Perform asynchronous transcription using OpenAI-compatible API.
5351
"""
54-
data = {
55-
"language": "en",
56-
"stream": True,
57-
"model": "openai/whisper-large-v3",
58-
}
59-
url = base_url + "/audio/transcriptions"
60-
headers = {"Authorization": f"Bearer {api_key}"}
61-
print("transcription result:", end=" ")
62-
# OpenAI Transcription API client does not support streaming.
63-
async with httpx.AsyncClient() as client:
64-
with open(audio_path, "rb") as f:
65-
async with client.stream(
66-
"POST", url, files={"file": f}, data=data, headers=headers
67-
) as response:
68-
async for line in response.aiter_lines():
69-
# Each line is a JSON object prefixed with 'data: '
70-
if line:
71-
if line.startswith("data: "):
72-
line = line[len("data: ") :]
73-
# Last chunk, stream ends
74-
if line.strip() == "[DONE]":
75-
break
76-
# Parse the JSON response
77-
chunk = json.loads(line)
78-
# Extract and print the content
79-
content = chunk["choices"][0].get("delta", {}).get("content")
80-
print(content, end="")
52+
print("\ntranscription result:", end=" ")
53+
with open(audio_path, "rb") as f:
54+
transcription = await client.audio.transcriptions.create(
55+
file=f,
56+
model="openai/whisper-large-v3",
57+
language="en",
58+
response_format="json",
59+
temperature=0.0,
60+
# Additional sampling params not provided by OpenAI API.
61+
extra_body=dict(
62+
seed=420,
63+
top_p=0.6,
64+
),
65+
stream=True,
66+
)
67+
async for chunk in transcription:
68+
if chunk.choices:
69+
content = chunk.choices[0].get("delta", {}).get("content")
70+
print(content, end="", flush=True)
71+
8172
print() # Final newline after stream ends
8273

8374

@@ -95,7 +86,11 @@ def main():
9586

9687
sync_openai(mary_had_lamb, client)
9788
# Run the asynchronous function
98-
asyncio.run(stream_openai_response(winning_call, openai_api_base, openai_api_key))
89+
client = AsyncOpenAI(
90+
api_key=openai_api_key,
91+
base_url=openai_api_base,
92+
)
93+
asyncio.run(stream_openai_response(winning_call, client))
9994

10095

10196
if __name__ == "__main__":

vllm/entrypoints/openai/protocol.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
17501750
timestamps incurs additional latency.
17511751
"""
17521752

1753-
# --8<-- [start:transcription-extra-params]
17541753
stream: Optional[bool] = False
1755-
"""Custom field not present in the original OpenAI definition. When set,
1756-
it will enable output to be streamed in a similar fashion as the Chat
1757-
Completion endpoint.
1754+
"""When set, it will enable output to be streamed in a similar fashion
1755+
as the Chat Completion endpoint.
17581756
"""
1757+
# --8<-- [start:transcription-extra-params]
17591758
# Flattened stream option to simplify form data.
17601759
stream_include_usage: Optional[bool] = False
17611760
stream_continuous_usage_stats: Optional[bool] = False

0 commit comments

Comments
 (0)