Skip to content

Commit ad344ef

Browse files
authored
[gpt-oss] Small bug fixes for frontend (#22512)
Signed-off-by: Chen Zhang <[email protected]>
1 parent bbaf9e9 commit ad344ef

File tree

5 files changed

+77
-33
lines changed

5 files changed

+77
-33
lines changed

vllm/entrypoints/context.py

Lines changed: 42 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import json
34
import logging
45
from abc import ABC, abstractmethod
6+
from typing import TYPE_CHECKING, Union
57

6-
from openai_harmony import Message, Role, StreamState
8+
from openai_harmony import Author, Message, Role, StreamState, TextContent
79

810
from vllm.entrypoints.harmony_utils import (
911
get_encoding, get_streamable_parser_for_assistant, render_for_completion)
1012
from vllm.entrypoints.tool import Tool
1113
from vllm.outputs import RequestOutput
1214

15+
if TYPE_CHECKING:
16+
from mcp.client import ClientSession
17+
1318
logger = logging.getLogger(__name__)
1419

1520

@@ -71,6 +76,7 @@ def __init__(
7176
def append_output(self, output) -> None:
7277
if isinstance(output, RequestOutput):
7378
output_token_ids = output.outputs[0].token_ids
79+
self.parser = get_streamable_parser_for_assistant()
7480
for token_id in output_token_ids:
7581
self.parser.process(token_id)
7682
output_msgs = self.parser.messages
@@ -106,19 +112,41 @@ async def call_tool(self) -> list[Message]:
106112
def render_for_completion(self) -> list[int]:
107113
return render_for_completion(self.messages)
108114

109-
async def call_search_tool(
110-
self,
111-
tool_session: Tool,
112-
last_msg: Message,
113-
) -> list[Message]:
114-
return await tool_session.get_result(self)
115-
116-
async def call_python_tool(
117-
self,
118-
tool_session: Tool,
119-
last_msg: Message,
120-
) -> list[Message]:
121-
return await tool_session.get_result(self)
115+
async def call_search_tool(self, tool_session: Union["ClientSession",
116+
Tool],
117+
last_msg: Message) -> list[Message]:
118+
if isinstance(tool_session, Tool):
119+
return await tool_session.get_result(self)
120+
tool_name = last_msg.recipient.split(".")[1]
121+
args = json.loads(last_msg.content[0].text)
122+
result = await tool_session.call_tool(tool_name, args)
123+
result_str = result.content[0].text
124+
content = TextContent(text=result_str)
125+
author = Author(role=Role.TOOL, name=last_msg.recipient)
126+
return [
127+
Message(author=author, content=[content], recipient=Role.ASSISTANT)
128+
]
129+
130+
async def call_python_tool(self, tool_session: Union["ClientSession",
131+
Tool],
132+
last_msg: Message) -> list[Message]:
133+
if isinstance(tool_session, Tool):
134+
return await tool_session.get_result(self)
135+
param = {
136+
"code": last_msg.content[0].text,
137+
}
138+
result = await tool_session.call_tool("python", param)
139+
result_str = result.content[0].text
140+
141+
content = TextContent(text=result_str)
142+
author = Author(role=Role.TOOL, name="python")
143+
144+
return [
145+
Message(author=author,
146+
content=[content],
147+
channel=last_msg.channel,
148+
recipient=Role.ASSISTANT)
149+
]
122150

123151

124152
class StreamingHarmonyContext(HarmonyContext):

vllm/entrypoints/openai/protocol.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
# yapf: enable
2020
from openai.types.responses import (ResponseFunctionToolCall,
2121
ResponseInputItemParam, ResponseOutputItem,
22-
ResponsePrompt, ResponseStatus,
23-
ResponseTextConfig)
22+
ResponsePrompt, ResponseReasoningItem,
23+
ResponseStatus, ResponseTextConfig)
2424
from openai.types.responses.response import ToolChoice
2525
from openai.types.responses.tool import Tool
2626
from openai.types.shared import Metadata, Reasoning
@@ -239,6 +239,7 @@ def get_logits_processors(processors: Optional[LogitsProcessors],
239239

240240

241241
ResponseInputOutputItem: TypeAlias = Union[ResponseInputItemParam,
242+
ResponseReasoningItem,
242243
ResponseFunctionToolCall]
243244

244245

vllm/entrypoints/openai/serving_responses.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@
1616
from openai import BaseModel
1717
# yapf conflicts with isort for this block
1818
# yapf: disable
19-
from openai.types.responses import (ResponseContentPartDoneEvent,
20-
ResponseCreatedEvent,
19+
from openai.types.responses import (ResponseCreatedEvent,
2120
ResponseFunctionToolCall,
2221
ResponseInProgressEvent,
2322
ResponseOutputItem,
@@ -54,7 +53,7 @@
5453
# yapf: enable
5554
from vllm.entrypoints.openai.serving_engine import OpenAIServing
5655
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
57-
from vllm.entrypoints.tool_server import ToolServer
56+
from vllm.entrypoints.tool_server import MCPToolServer, ToolServer
5857
from vllm.inputs.data import TokensPrompt as EngineTokensPrompt
5958
from vllm.logger import init_logger
6059
from vllm.outputs import CompletionOutput
@@ -238,6 +237,15 @@ async def create_responses(
238237
if raw_request:
239238
raw_request.state.request_metadata = request_metadata
240239

240+
if self.tool_server is not None and isinstance(
241+
self.tool_server, MCPToolServer
242+
) and (request.background or request.stream) and request.tools and any(
243+
tool.type in ["web_search_preview", "code_interpreter"]
244+
for tool in request.tools):
245+
return self.create_error_response(
246+
"MCP tool server is not supported in background mode and "
247+
"streaming mode")
248+
241249
# Schedule the request and get the result generator.
242250
generators: list[AsyncGenerator[ConversationContext, None]] = []
243251

@@ -844,9 +852,13 @@ def _send_event(event: BaseModel):
844852
type="reasoning",
845853
content=[
846854
ResponseReasoningTextContent(
847-
text=previous_item.content[0].text),
855+
text=previous_item.content[0].text,
856+
type="reasoning_text",
857+
),
848858
],
849859
status="completed",
860+
id=current_item_id,
861+
summary=[],
850862
)
851863
yield _send_event(
852864
ResponseReasoningTextDoneEvent(
@@ -857,15 +869,6 @@ def _send_event(event: BaseModel):
857869
content_index=current_content_index,
858870
text=previous_item.content[0].text,
859871
))
860-
yield _send_event(
861-
ResponseContentPartDoneEvent(
862-
type="response.content_part.done",
863-
item_id=current_item_id,
864-
sequence_number=-1,
865-
output_index=current_output_index,
866-
content_index=current_content_index,
867-
part=reasoning_item,
868-
))
869872
yield _send_event(
870873
ResponseOutputItemDoneEvent(
871874
type="response.output_item.done",

vllm/entrypoints/tool.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import os
44
from abc import ABC, abstractmethod
5-
from typing import TYPE_CHECKING, Any
5+
from typing import TYPE_CHECKING, Any, Optional
6+
7+
from openai_harmony import Message
68

79
from vllm.logger import init_logger
810

@@ -70,7 +72,16 @@ def __init__(self):
7072
"gpt_oss is not installed, code interpreter is disabled")
7173
return
7274

73-
self.python_tool = PythonTool()
75+
# NOTE (Chen): as of gpt-oss 0.0.2, there is a bug in _make_response
76+
# and we do the following monkey patch to fix it.
77+
class PatchedGptOssPythonTool(PythonTool):
78+
79+
def _make_response(self,
80+
output: str,
81+
channel: Optional[str] = None) -> Message:
82+
return super()._make_response(output)
83+
84+
self.python_tool = PatchedGptOssPythonTool()
7485
logger.info_once("Code interpreter tool initialized")
7586

7687
async def get_result(self, context: "ConversationContext") -> Any:

vllm/entrypoints/tool_server.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from contextlib import AbstractAsyncContextManager, asynccontextmanager
55
from typing import TYPE_CHECKING, Any, Optional
66

7-
from openai_harmony import ToolNamespaceConfig
7+
from openai_harmony import ToolDescription, ToolNamespaceConfig
88

99
from vllm.entrypoints.tool import HarmonyBrowserTool, HarmonyPythonTool, Tool
1010
from vllm.logger import init_logger
@@ -105,7 +105,6 @@ def __init__(self):
105105
self.harmony_tool_descriptions = {}
106106

107107
async def add_tool_server(self, server_url: str):
108-
from mcp.types import ToolDescription
109108
tool_urls = server_url.split(",")
110109
self.harmony_tool_descriptions = {}
111110
self.urls: dict[str, str] = {}
@@ -133,6 +132,8 @@ async def add_tool_server(self, server_url: str):
133132
logger.warning(
134133
"Tool %s already exists. Ignoring duplicate tool server %s",
135134
tool_from_mcp.name, url)
135+
logger.info("MCPToolServer initialized with tools: %s",
136+
list(self.harmony_tool_descriptions.keys()))
136137

137138
def has_tool(self, tool_name: str):
138139
return tool_name in self.harmony_tool_descriptions

0 commit comments

Comments
 (0)