Skip to content

Commit 8f423e5

Browse files
authored
[Feature][Response API] Add streaming support for non-harmony (vllm-project#23741)
Signed-off-by: Kebe <[email protected]>
1 parent 369a079 commit 8f423e5

File tree

3 files changed

+407
-77
lines changed

3 files changed

+407
-77
lines changed

tests/v1/entrypoints/openai/responses/test_basic.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
import openai # use the official client for correctness check
5+
import openai.types.responses as openai_responses_types
56
import pytest
67

78

@@ -86,3 +87,18 @@ async def test_logprobs(client: openai.AsyncOpenAI):
8687
outputs = response.output
8788
assert outputs[-1].content[-1].logprobs
8889
assert len(outputs[-1].content[-1].logprobs[0].top_logprobs) == 5
90+
91+
92+
@pytest.mark.asyncio
93+
async def test_streaming(client: openai.AsyncOpenAI):
94+
stream = await client.responses.create(
95+
input="What is 13 * 24?",
96+
stream=True,
97+
)
98+
events = [event async for event in stream]
99+
assert isinstance(events[0], openai_responses_types.ResponseCreatedEvent)
100+
assert any(
101+
isinstance(event, openai_responses_types.ResponseTextDeltaEvent)
102+
for event in events)
103+
assert isinstance(events[-1],
104+
openai_responses_types.ResponseCompletedEvent)

vllm/entrypoints/context.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,19 @@ class SimpleContext(ConversationContext):
4949

5050
def __init__(self):
5151
self.last_output = None
52+
self.num_prompt_tokens = 0
53+
self.num_output_tokens = 0
54+
self.num_cached_tokens = 0
55+
# todo num_reasoning_tokens is not implemented yet.
56+
self.num_reasoning_tokens = 0
5257

5358
def append_output(self, output) -> None:
5459
self.last_output = output
60+
if not isinstance(output, RequestOutput):
61+
raise ValueError("SimpleContext only supports RequestOutput.")
62+
self.num_prompt_tokens = len(output.prompt_token_ids or [])
63+
self.num_cached_tokens = output.num_cached_tokens or 0
64+
self.num_output_tokens += len(output.outputs[0].token_ids or [])
5565

5666
def need_builtin_tool_call(self) -> bool:
5767
return False

0 commit comments

Comments
 (0)