Skip to content

Commit 5db1870

Browse files
qandrewAndrew Xia
andauthored
[gpt-oss] use vLLM instead of openai types for streaming (#25186)
Signed-off-by: Andrew Xia <[email protected]> Signed-off-by: Andrew Xia <[email protected]> Co-authored-by: Andrew Xia <[email protected]>
1 parent 2ce26b9 commit 5db1870

File tree

3 files changed

+59
-22
lines changed

3 files changed

+59
-22
lines changed

tests/entrypoints/openai/test_response_api_with_harmony.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,14 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool):
379379
if event.type == "response.created":
380380
resp_id = event.response.id
381381

382+
# test vllm custom types are in the response
383+
if event.type in [
384+
"response.completed", "response.in_progress",
385+
"response.created"
386+
]:
387+
assert 'input_messages' in event.response.model_extra
388+
assert 'output_messages' in event.response.model_extra
389+
382390
if current_event_mode != event.type:
383391
current_event_mode = event.type
384392
print(f"\n[{event.type}] ", end="", flush=True)

vllm/entrypoints/openai/protocol.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,32 @@
1717
ChatCompletionAudio as OpenAIChatCompletionAudio)
1818
from openai.types.chat.chat_completion_message import (
1919
Annotation as OpenAIAnnotation)
20-
# yapf: enable
2120
from openai.types.responses import (
2221
ResponseCodeInterpreterCallCodeDeltaEvent,
2322
ResponseCodeInterpreterCallCodeDoneEvent,
2423
ResponseCodeInterpreterCallCompletedEvent,
2524
ResponseCodeInterpreterCallInProgressEvent,
26-
ResponseCodeInterpreterCallInterpretingEvent, ResponseCompletedEvent,
27-
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
28-
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseInProgressEvent,
29-
ResponseInputItemParam, ResponseOutputItem, ResponseOutputItemAddedEvent,
30-
ResponseOutputItemDoneEvent, ResponsePrompt, ResponseReasoningItem,
31-
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
32-
ResponseStatus, ResponseWebSearchCallCompletedEvent,
33-
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent)
25+
ResponseCodeInterpreterCallInterpretingEvent)
26+
from openai.types.responses import (
27+
ResponseCompletedEvent as OpenAIResponseCompletedEvent)
28+
from openai.types.responses import (ResponseContentPartAddedEvent,
29+
ResponseContentPartDoneEvent)
30+
from openai.types.responses import (
31+
ResponseCreatedEvent as OpenAIResponseCreatedEvent)
32+
from openai.types.responses import ResponseFunctionToolCall
33+
from openai.types.responses import (
34+
ResponseInProgressEvent as OpenAIResponseInProgressEvent)
35+
from openai.types.responses import (ResponseInputItemParam, ResponseOutputItem,
36+
ResponseOutputItemAddedEvent,
37+
ResponseOutputItemDoneEvent,
38+
ResponsePrompt, ResponseReasoningItem,
39+
ResponseReasoningTextDeltaEvent,
40+
ResponseReasoningTextDoneEvent,
41+
ResponseStatus,
42+
ResponseWebSearchCallCompletedEvent,
43+
ResponseWebSearchCallInProgressEvent,
44+
ResponseWebSearchCallSearchingEvent)
45+
# yapf: enable
3446
from openai.types.responses.response_reasoning_item import (
3547
Content as ResponseReasoningTextContent)
3648

@@ -2077,10 +2089,24 @@ class ResponseReasoningPartAddedEvent(OpenAIBaseModel):
20772089
"""The type of the event. Always `response.reasoning_part.added`."""
20782090

20792091

2092+
# vLLM Streaming Events
2093+
# Note: we override the response type with the vLLM ResponsesResponse type
2094+
class ResponseCompletedEvent(OpenAIResponseCompletedEvent):
2095+
response: ResponsesResponse # type: ignore[override]
2096+
2097+
2098+
class ResponseCreatedEvent(OpenAIResponseCreatedEvent):
2099+
response: ResponsesResponse # type: ignore[override]
2100+
2101+
2102+
class ResponseInProgressEvent(OpenAIResponseInProgressEvent):
2103+
response: ResponsesResponse # type: ignore[override]
2104+
2105+
20802106
StreamingResponsesResponse: TypeAlias = Union[
2081-
ResponseCreatedEvent,
2082-
ResponseInProgressEvent,
2083-
ResponseCompletedEvent,
2107+
"ResponseCreatedEvent",
2108+
"ResponseInProgressEvent",
2109+
"ResponseCompletedEvent",
20842110
ResponseOutputItemAddedEvent,
20852111
ResponseOutputItemDoneEvent,
20862112
ResponseContentPartAddedEvent,

vllm/entrypoints/openai/serving_responses.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@
2222
ResponseCodeInterpreterCallCompletedEvent,
2323
ResponseCodeInterpreterCallInProgressEvent,
2424
ResponseCodeInterpreterCallInterpretingEvent,
25-
ResponseCodeInterpreterToolCallParam, ResponseCompletedEvent,
26-
ResponseContentPartAddedEvent, ResponseContentPartDoneEvent,
27-
ResponseCreatedEvent, ResponseFunctionToolCall, ResponseFunctionWebSearch,
28-
ResponseInProgressEvent, ResponseOutputItem, ResponseOutputItemAddedEvent,
29-
ResponseOutputItemDoneEvent, ResponseOutputMessage, ResponseOutputText,
30-
ResponseReasoningItem, ResponseReasoningTextDeltaEvent,
31-
ResponseReasoningTextDoneEvent, ResponseStatus, ResponseTextDeltaEvent,
32-
ResponseTextDoneEvent, ResponseWebSearchCallCompletedEvent,
33-
ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent,
34-
response_function_web_search, response_text_delta_event)
25+
ResponseCodeInterpreterToolCallParam, ResponseContentPartAddedEvent,
26+
ResponseContentPartDoneEvent, ResponseFunctionToolCall,
27+
ResponseFunctionWebSearch, ResponseOutputItem,
28+
ResponseOutputItemAddedEvent, ResponseOutputItemDoneEvent,
29+
ResponseOutputMessage, ResponseOutputText, ResponseReasoningItem,
30+
ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent,
31+
ResponseStatus, ResponseTextDeltaEvent, ResponseTextDoneEvent,
32+
ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent,
33+
ResponseWebSearchCallSearchingEvent, response_function_web_search,
34+
response_text_delta_event)
3535
from openai.types.responses.response_output_text import (Logprob,
3636
LogprobTopLogprob)
3737
# yapf: enable
@@ -58,6 +58,9 @@
5858
InputTokensDetails,
5959
OutputTokensDetails,
6060
RequestResponseMetadata,
61+
ResponseCompletedEvent,
62+
ResponseCreatedEvent,
63+
ResponseInProgressEvent,
6164
ResponseReasoningPartAddedEvent,
6265
ResponseReasoningPartDoneEvent,
6366
ResponsesRequest,

0 commit comments

Comments
 (0)