Skip to content

Commit 5d18bf8

Browse files
authored
[Bugfix] Fix Harmony preamble visibility in Responses API (vllm-project#32114)
Signed-off-by: Pushkar Patel <git@thepushkarp.com> Signed-off-by: pupa <pupa@users.noreply.github.com>
1 parent 0788ff0 commit 5d18bf8

File tree

10 files changed

+341
-63
lines changed

10 files changed

+341
-63
lines changed

tests/entrypoints/openai/parser/test_harmony_utils.py

Lines changed: 109 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,19 @@
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

44
import pytest
5-
from openai.types.responses import ResponseFunctionToolCall, ResponseReasoningItem
5+
from openai.types.responses import (
6+
ResponseFunctionToolCall,
7+
ResponseOutputMessage,
8+
ResponseReasoningItem,
9+
)
610
from openai.types.responses.response_output_item import McpCall
711
from openai_harmony import Author, Message, Role, TextContent
812

913
from tests.entrypoints.openai.utils import verify_harmony_messages
1014
from vllm.entrypoints.openai.parser.harmony_utils import (
1115
auto_drop_analysis_messages,
1216
get_encoding,
17+
get_system_message,
1318
has_custom_tools,
1419
parse_chat_input_to_harmony_message,
1520
parse_chat_output,
@@ -840,15 +845,58 @@ def test_parse_chat_output_complete_reasoning_and_content(self) -> None:
840845
assert reasoning == "I've thought hard about this."
841846
assert final_content == "The answer is 4."
842847

848+
def test_parse_chat_output_commentary_with_recipient_excluded(self) -> None:
849+
"""Commentary with a recipient (tool call) should not appear in
850+
final_content — those are handled separately by the tool parser.
851+
852+
The first message is a preamble (visible), the second is a tool
853+
call (excluded). Only the preamble should appear in final_content.
854+
"""
855+
harmony_str = (
856+
"<|channel|>commentary"
857+
"<|message|>Let me check the weather.<|end|>"
858+
"<|start|>assistant to=functions.get_weather"
859+
"<|channel|>commentary"
860+
'<|message|>{"location": "SF"}<|end|>'
861+
)
862+
token_ids = get_encoding().encode(harmony_str, allowed_special="all")
863+
reasoning, final_content, _ = parse_chat_output(token_ids)
864+
assert reasoning is None
865+
assert final_content == "Let me check the weather."
866+
867+
def test_parse_chat_output_interrupted_preamble(self) -> None:
868+
"""Partial/interrupted preamble (commentary without recipient) should
869+
appear in final_content, not reasoning."""
870+
harmony_str = "<|channel|>commentary<|message|>I'll search for that"
871+
token_ids = get_encoding().encode(harmony_str, allowed_special="all")
872+
reasoning, final_content, _ = parse_chat_output(token_ids)
873+
assert reasoning is None
874+
assert final_content == "I'll search for that"
875+
876+
def test_parse_chat_output_preamble_then_final(self) -> None:
877+
"""Preamble followed by a final message should both appear in
878+
final_content, joined by newline."""
879+
harmony_str = (
880+
"<|channel|>commentary"
881+
"<|message|>Let me look that up.<|end|>"
882+
"<|start|>assistant<|channel|>final"
883+
"<|message|>The answer is 42.<|end|>"
884+
)
885+
token_ids = get_encoding().encode(harmony_str, allowed_special="all")
886+
reasoning, final_content, _ = parse_chat_output(token_ids)
887+
assert reasoning is None
888+
assert final_content == "Let me look that up.\nThe answer is 42."
889+
843890

844891
class TestParseOutputMessage:
845892
"""Tests for parse_output_message function."""
846893

847-
def test_commentary_with_no_recipient_creates_reasoning(self):
848-
"""Test that commentary with recipient=None (preambles) creates reasoning items.
894+
def test_commentary_with_no_recipient_creates_message(self):
895+
"""Test that commentary with recipient=None (preambles) creates message items.
849896
850-
Per Harmony format, commentary channel can contain preambles to calling
851-
multiple functions - explanatory text with no recipient.
897+
Per Harmony format, preambles are intended to be shown to end-users,
898+
unlike analysis channel content which is hidden reasoning.
899+
See: https://cookbook.openai.com/articles/openai-harmony
852900
"""
853901
message = Message.from_role_and_content(
854902
Role.ASSISTANT, "I will now search for the weather information."
@@ -859,13 +907,16 @@ def test_commentary_with_no_recipient_creates_reasoning(self):
859907
output_items = parse_output_message(message)
860908

861909
assert len(output_items) == 1
862-
assert isinstance(output_items[0], ResponseReasoningItem)
863-
assert output_items[0].type == "reasoning"
910+
assert isinstance(output_items[0], ResponseOutputMessage)
911+
assert output_items[0].type == "message"
912+
assert output_items[0].role == "assistant"
913+
assert output_items[0].status == "completed"
914+
assert len(output_items[0].content) == 1
915+
assert output_items[0].content[0].type == "output_text"
864916
assert (
865917
output_items[0].content[0].text
866918
== "I will now search for the weather information."
867919
)
868-
assert output_items[0].content[0].type == "reasoning_text"
869920

870921
def test_commentary_with_function_recipient_creates_function_call(self):
871922
"""Test commentary with recipient='functions.X' creates function calls."""
@@ -944,7 +995,7 @@ def test_commentary_with_empty_content_and_no_recipient(self):
944995
output_items = parse_output_message(message)
945996

946997
assert len(output_items) == 1
947-
assert isinstance(output_items[0], ResponseReasoningItem)
998+
assert isinstance(output_items[0], ResponseOutputMessage)
948999
assert output_items[0].content[0].text == ""
9491000

9501001
def test_commentary_with_multiple_contents_and_no_recipient(self):
@@ -958,10 +1009,13 @@ def test_commentary_with_multiple_contents_and_no_recipient(self):
9581009

9591010
output_items = parse_output_message(message)
9601011

961-
assert len(output_items) == 2
962-
assert all(isinstance(item, ResponseReasoningItem) for item in output_items)
1012+
# _parse_final_message returns single ResponseOutputMessage with
1013+
# multiple contents
1014+
assert len(output_items) == 1
1015+
assert isinstance(output_items[0], ResponseOutputMessage)
1016+
assert len(output_items[0].content) == 2
9631017
assert output_items[0].content[0].text == "Step 1: Analyze the request"
964-
assert output_items[1].content[0].text == "Step 2: Prepare to call functions"
1018+
assert output_items[0].content[1].text == "Step 2: Prepare to call functions"
9651019

9661020
def test_commentary_with_multiple_function_calls(self):
9671021
"""Test multiple function calls in commentary channel."""
@@ -1133,7 +1187,7 @@ def test_parse_remaining_state_commentary_channel() -> None:
11331187
assert mcp_items[0].status == "in_progress"
11341188

11351189
# Test 3: Built-in tool (python)
1136-
# should NOT return MCP call, falls through to reasoning
1190+
# should NOT return MCP call, returns reasoning (internal tool interaction)
11371191
parser_builtin = Mock()
11381192
parser_builtin.current_content = "print('hello')"
11391193
parser_builtin.current_role = Role.ASSISTANT
@@ -1142,11 +1196,26 @@ def test_parse_remaining_state_commentary_channel() -> None:
11421196

11431197
builtin_items = parse_remaining_state(parser_builtin)
11441198

1145-
# Should fall through to reasoning logic
1199+
# Built-in tools explicitly return reasoning
11461200
assert len(builtin_items) == 1
11471201
assert not isinstance(builtin_items[0], McpCall)
11481202
assert builtin_items[0].type == "reasoning"
11491203

1204+
# Test 4: No recipient (preamble) → should return message, not reasoning
1205+
parser_preamble = Mock()
1206+
parser_preamble.current_content = "I'll search for that information now."
1207+
parser_preamble.current_role = Role.ASSISTANT
1208+
parser_preamble.current_channel = "commentary"
1209+
parser_preamble.current_recipient = None
1210+
1211+
preamble_items = parse_remaining_state(parser_preamble)
1212+
1213+
assert len(preamble_items) == 1
1214+
assert isinstance(preamble_items[0], ResponseOutputMessage)
1215+
assert preamble_items[0].type == "message"
1216+
assert preamble_items[0].content[0].text == "I'll search for that information now."
1217+
assert preamble_items[0].status == "incomplete" # streaming
1218+
11501219

11511220
def test_parse_remaining_state_analysis_channel() -> None:
11521221
"""Test parse_remaining_state with analysis channel and various recipients."""
@@ -1199,3 +1268,29 @@ def test_parse_remaining_state_analysis_channel() -> None:
11991268
assert len(builtin_items) == 1
12001269
assert not isinstance(builtin_items[0], McpCall)
12011270
assert builtin_items[0].type == "reasoning"
1271+
1272+
1273+
class TestGetSystemMessage:
1274+
"""Tests for get_system_message channel configuration."""
1275+
1276+
def test_commentary_channel_present_without_custom_tools(self) -> None:
1277+
"""Commentary channel must be valid even without custom tools."""
1278+
sys_msg = get_system_message(with_custom_tools=False)
1279+
valid_channels = sys_msg.content[0].channel_config.valid_channels
1280+
assert "commentary" in valid_channels
1281+
1282+
def test_commentary_channel_present_with_custom_tools(self) -> None:
1283+
"""Commentary channel present when custom tools are enabled."""
1284+
sys_msg = get_system_message(with_custom_tools=True)
1285+
valid_channels = sys_msg.content[0].channel_config.valid_channels
1286+
assert "commentary" in valid_channels
1287+
1288+
def test_all_standard_channels_present(self) -> None:
1289+
"""All three standard Harmony channels should always be valid."""
1290+
for with_tools in (True, False):
1291+
sys_msg = get_system_message(with_custom_tools=with_tools)
1292+
valid_channels = sys_msg.content[0].channel_config.valid_channels
1293+
for channel in ("analysis", "commentary", "final"):
1294+
assert channel in valid_channels, (
1295+
f"{channel} missing when with_custom_tools={with_tools}"
1296+
)

tests/entrypoints/openai/responses/test_harmony.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -712,15 +712,14 @@ async def test_function_calling_required(client: OpenAI, model_name: str):
712712
async def test_system_message_with_tools(client: OpenAI, model_name: str):
713713
from vllm.entrypoints.openai.parser.harmony_utils import get_system_message
714714

715-
# Test with custom tools enabled - commentary channel should be available
716-
sys_msg = get_system_message(with_custom_tools=True)
717-
valid_channels = sys_msg.content[0].channel_config.valid_channels
718-
assert "commentary" in valid_channels
719-
720-
# Test with custom tools disabled - commentary channel should be removed
721-
sys_msg = get_system_message(with_custom_tools=False)
722-
valid_channels = sys_msg.content[0].channel_config.valid_channels
723-
assert "commentary" not in valid_channels
715+
# Commentary channel should always be present (needed for preambles)
716+
# regardless of whether custom tools are enabled
717+
for with_tools in (True, False):
718+
sys_msg = get_system_message(with_custom_tools=with_tools)
719+
valid_channels = sys_msg.content[0].channel_config.valid_channels
720+
assert "commentary" in valid_channels, (
721+
f"commentary channel missing when with_custom_tools={with_tools}"
722+
)
724723

725724

726725
@pytest.mark.asyncio

tests/entrypoints/openai/responses/test_mcp_tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,13 @@ async def test_mcp_tool_env_flag_enabled(self, client: OpenAI, model_name: str):
172172
recipient = message.get("recipient")
173173
if recipient and recipient.startswith("python"):
174174
tool_call_found = True
175-
assert message.get("channel") == "analysis"
175+
assert message.get("channel") == "commentary"
176176
author = message.get("author", {})
177177
if author.get("role") == "tool" and (author.get("name") or "").startswith(
178178
"python"
179179
):
180180
tool_response_found = True
181-
assert message.get("channel") == "analysis"
181+
assert message.get("channel") == "commentary"
182182

183183
assert tool_call_found, (
184184
f"No Python tool call found. "

tests/entrypoints/openai/test_serving_chat_stream_harmony.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -180,20 +180,13 @@ def test_tool_call_index_from_previous_messages(self):
180180

181181
assert delta_message.tool_calls[0].index == 1
182182

183-
@pytest.mark.parametrize(
184-
"channel,recipient",
185-
[
186-
("commentary", None),
187-
("commentary", "browser.search"),
188-
],
189-
)
190-
def test_returns_tool_call_preambles(self, channel, recipient):
191-
"""Test that invalid tool recipient on commentary is treated as content."""
183+
def test_returns_preambles_as_content(self):
184+
"""Test that commentary with no recipient (preamble) is user content."""
192185
parser = MockStreamableParser()
193186
delta_text = "some text"
194187

195188
token_states = [
196-
TokenState(channel=channel, recipient=recipient, text=delta_text)
189+
TokenState(channel="commentary", recipient=None, text=delta_text)
197190
]
198191

199192
delta_message, tools_streamed = extract_harmony_streaming_delta(
@@ -211,6 +204,7 @@ def test_returns_tool_call_preambles(self, channel, recipient):
211204
[
212205
(None, None),
213206
("unknown_channel", None),
207+
("commentary", "browser.search"),
214208
],
215209
)
216210
def test_returns_none_for_invalid_inputs(self, channel, recipient):

tests/entrypoints/openai/test_serving_responses.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
_extract_allowed_tools_from_mcp_requests,
2727
extract_tool_types,
2828
)
29+
from vllm.entrypoints.openai.responses.streaming_events import (
30+
StreamingState,
31+
)
2932
from vllm.inputs.data import TokensPrompt
3033
from vllm.outputs import CompletionOutput, RequestOutput
3134
from vllm.sampling_params import SamplingParams
@@ -439,3 +442,115 @@ def test_extract_allowed_tools_filters_non_mcp(self):
439442
"server1": ["tool1"],
440443
"server2": ["tool2"],
441444
}
445+
446+
447+
class TestHarmonyPreambleStreaming:
448+
"""Tests for preamble (commentary with no recipient) streaming events."""
449+
450+
@staticmethod
451+
def _make_ctx(*, channel, recipient, delta="hello"):
452+
"""Build a lightweight mock StreamingHarmonyContext."""
453+
ctx = MagicMock()
454+
ctx.last_content_delta = delta
455+
ctx.parser.current_channel = channel
456+
ctx.parser.current_recipient = recipient
457+
return ctx
458+
459+
@staticmethod
460+
def _make_previous_item(*, channel, recipient, text="preamble text"):
461+
"""Build a lightweight mock previous_item (openai_harmony Message)."""
462+
content_part = MagicMock()
463+
content_part.text = text
464+
item = MagicMock()
465+
item.channel = channel
466+
item.recipient = recipient
467+
item.content = [content_part]
468+
return item
469+
470+
def test_preamble_delta_emits_text_events(self) -> None:
471+
"""commentary + recipient=None should emit output_text.delta events."""
472+
from vllm.entrypoints.openai.responses.streaming_events import (
473+
emit_content_delta_events,
474+
)
475+
476+
ctx = self._make_ctx(channel="commentary", recipient=None)
477+
state = StreamingState()
478+
479+
events = emit_content_delta_events(ctx, state)
480+
481+
type_names = [e.type for e in events]
482+
assert "response.output_text.delta" in type_names
483+
assert "response.output_item.added" in type_names
484+
485+
def test_preamble_delta_second_token_no_added(self) -> None:
486+
"""Second preamble token should emit delta only, not added again."""
487+
from vllm.entrypoints.openai.responses.streaming_events import (
488+
emit_content_delta_events,
489+
)
490+
491+
ctx = self._make_ctx(channel="commentary", recipient=None, delta="w")
492+
state = StreamingState()
493+
state.sent_output_item_added = True
494+
state.current_item_id = "msg_test"
495+
state.current_content_index = 0
496+
497+
events = emit_content_delta_events(ctx, state)
498+
499+
type_names = [e.type for e in events]
500+
assert "response.output_text.delta" in type_names
501+
assert "response.output_item.added" not in type_names
502+
503+
def test_commentary_with_function_recipient_not_preamble(self) -> None:
504+
"""commentary + recipient='functions.X' must NOT use preamble path."""
505+
from vllm.entrypoints.openai.responses.streaming_events import (
506+
emit_content_delta_events,
507+
)
508+
509+
ctx = self._make_ctx(
510+
channel="commentary",
511+
recipient="functions.get_weather",
512+
)
513+
state = StreamingState()
514+
515+
events = emit_content_delta_events(ctx, state)
516+
517+
type_names = [e.type for e in events]
518+
assert "response.output_text.delta" not in type_names
519+
520+
def test_preamble_done_emits_text_done_events(self) -> None:
521+
"""Completed preamble should emit text done + content_part done +
522+
output_item done, same shape as final channel."""
523+
from vllm.entrypoints.openai.responses.streaming_events import (
524+
emit_previous_item_done_events,
525+
)
526+
527+
previous = self._make_previous_item(channel="commentary", recipient=None)
528+
state = StreamingState()
529+
state.current_item_id = "msg_test"
530+
state.current_output_index = 0
531+
state.current_content_index = 0
532+
533+
events = emit_previous_item_done_events(previous, state)
534+
535+
type_names = [e.type for e in events]
536+
assert "response.output_text.done" in type_names
537+
assert "response.content_part.done" in type_names
538+
assert "response.output_item.done" in type_names
539+
540+
def test_commentary_with_recipient_no_preamble_done(self) -> None:
541+
"""commentary + recipient='functions.X' should route to function call
542+
done, not preamble done."""
543+
from vllm.entrypoints.openai.responses.streaming_events import (
544+
emit_previous_item_done_events,
545+
)
546+
547+
previous = self._make_previous_item(
548+
channel="commentary", recipient="functions.get_weather"
549+
)
550+
state = StreamingState()
551+
state.current_item_id = "fc_test"
552+
553+
events = emit_previous_item_done_events(previous, state)
554+
555+
type_names = [e.type for e in events]
556+
assert "response.output_text.done" not in type_names

0 commit comments

Comments
 (0)