Skip to content

Commit 42c3448

Browse files
committed
Handle multiple channels in one decoding stage
Signed-off-by: Aleksandr Samarin <[email protected]>
1 parent 0340f45 commit 42c3448

File tree

1 file changed

+92
-63
lines changed

1 file changed

+92
-63
lines changed

vllm/entrypoints/openai/serving_chat.py

Lines changed: 92 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -748,12 +748,18 @@ async def chat_completion_stream_generator(
748748
if self.use_harmony:
749749
harmony_parser = harmony_parsers[i]
750750
prev_recipient = harmony_parser.current_recipient
751-
delta_text = ""
751+
752+
# Track accumulated content per token with their state
753+
token_states = []
752754
for token_id in output.token_ids:
753755
harmony_parser.process(token_id)
754-
delta_text += harmony_parser.last_content_delta or ""
755-
cur_channel = harmony_parser.current_channel
756-
cur_recipient = harmony_parser.current_recipient
756+
token_delta = harmony_parser.last_content_delta or ""
757+
token_states.append((
758+
harmony_parser.current_channel,
759+
harmony_parser.current_recipient,
760+
token_delta
761+
))
762+
delta_text = "".join(state[2] for state in token_states)
757763
else:
758764
delta_text = output.text
759765

@@ -783,61 +789,80 @@ async def chat_completion_stream_generator(
783789
current_token_ids = as_list(output.token_ids)
784790

785791
if self.use_harmony:
786-
if cur_channel == "final":
787-
delta_message = DeltaMessage(content=delta_text)
788-
elif cur_channel == "analysis":
789-
if request.include_reasoning:
790-
delta_message = DeltaMessage(
791-
reasoning_content=delta_text
792-
)
793-
else:
794-
delta_message = None
795-
elif (
796-
cur_channel == "commentary"
797-
and cur_recipient
798-
and cur_recipient.startswith("functions.")
799-
):
800-
# Count completed tool calls to determine index
801-
base_index = 0
802-
for msg in harmony_parser.messages:
803-
if (
804-
msg.channel == "commentary"
805-
and msg.recipient
806-
and msg.recipient.startswith("functions.")
807-
):
808-
base_index += 1
809-
810-
if prev_recipient != cur_recipient:
811-
tool_name = cur_recipient.split("functions.", 1)[1]
812-
delta_message = DeltaMessage(
813-
tool_calls=[
814-
DeltaToolCall(
815-
id=make_tool_call_id(),
816-
type="function",
817-
function=DeltaFunctionCall(
818-
name=tool_name,
819-
arguments="",
820-
),
821-
index=base_index,
822-
)
823-
]
824-
)
825-
elif delta_text:
826-
delta_message = DeltaMessage(
827-
tool_calls=[
828-
DeltaToolCall(
829-
index=base_index,
830-
function=DeltaFunctionCall(
831-
arguments=delta_text
832-
),
833-
)
834-
]
835-
)
792+
# Group consecutive tokens with same channel/recipient
793+
groups = []
794+
for channel, recipient, text in token_states:
795+
if not text:
796+
continue
797+
if groups and groups[-1]['channel'] == channel and groups[-1]['recipient'] == recipient:
798+
groups[-1]['text'] += text
836799
else:
837-
delta_message = None
800+
groups.append({
801+
'channel': channel,
802+
'recipient': recipient,
803+
'text': text
804+
})
838805

839-
if delta_message is not None:
806+
# Process each group and create delta messages
807+
delta_message = None
808+
combined_content = ""
809+
combined_reasoning = ""
810+
tool_messages = []
811+
812+
for group in groups:
813+
group_channel = group['channel']
814+
group_recipient = group['recipient']
815+
group_text = group['text']
816+
817+
if group_channel == "final":
818+
combined_content += group_text
819+
elif group_channel == "analysis":
820+
if request.include_reasoning:
821+
combined_reasoning += group_text
822+
elif (group_channel == "commentary" and group_recipient
823+
and group_recipient.startswith("functions.")):
824+
825+
base_index = 0
826+
for msg in harmony_parser.messages:
827+
if (msg.channel == "commentary"
828+
and msg.recipient
829+
and msg.recipient.startswith(
830+
"functions.")):
831+
base_index += 1
832+
833+
if prev_recipient != group_recipient:
834+
tool_name = group_recipient.split(
835+
"functions.", 1)[1]
836+
tool_messages.append(DeltaToolCall(
837+
id=make_tool_call_id(),
838+
type="function",
839+
function=DeltaFunctionCall(
840+
name=tool_name,
841+
arguments="",
842+
),
843+
index=base_index,
844+
))
845+
prev_recipient = group_recipient
846+
847+
if group_text:
848+
tool_messages.append(DeltaToolCall(
849+
index=base_index,
850+
function=DeltaFunctionCall(
851+
arguments=group_text),
852+
))
853+
854+
# Combine all non-empty fields into a single message
855+
if combined_content or combined_reasoning or tool_messages:
856+
delta_kwargs = {}
857+
if combined_content:
858+
delta_kwargs['content'] = combined_content
859+
if combined_reasoning:
860+
delta_kwargs['reasoning_content'] = combined_reasoning
861+
if tool_messages:
862+
delta_kwargs['tool_calls'] = tool_messages
840863
harmony_tools_streamed[i] = True
864+
865+
delta_message = DeltaMessage(**delta_kwargs)
841866
else:
842867
delta_message = None
843868
# handle streaming deltas for tools with named tool_choice
@@ -1076,17 +1101,21 @@ async def chat_completion_stream_generator(
10761101

10771102
# Log streaming delta if output logging is enabled
10781103
if self.enable_log_outputs and self.request_logger:
1079-
delta_content = ""
1104+
delta_content_parts = []
10801105
if delta_message.content:
1081-
delta_content = delta_message.content
1082-
elif delta_message.tool_calls:
1083-
delta_content = "".join(
1106+
delta_content_parts.append(delta_message.content)
1107+
if delta_message.reasoning_content:
1108+
delta_content_parts.append(f"[reasoning: {delta_message.reasoning_content}]")
1109+
if delta_message.tool_calls:
1110+
tool_args = "".join(
10841111
tc.function.arguments
10851112
for tc in delta_message.tool_calls
1086-
if tc.function and tc.function.arguments
1087-
)
1113+
if tc.function and tc.function.arguments)
1114+
if tool_args:
1115+
delta_content_parts.append(f"[tool_calls: {tool_args}]")
10881116

1089-
if delta_content:
1117+
if delta_content_parts:
1118+
delta_content = " ".join(delta_content_parts)
10901119
self.request_logger.log_outputs(
10911120
request_id=request_id,
10921121
outputs=delta_content,

0 commit comments

Comments
 (0)