6
6
import time
7
7
from collections .abc import AsyncGenerator , AsyncIterator
8
8
from collections .abc import Sequence as GenericSequence
9
- from typing import Callable , Final , Optional , Union
9
+ from typing import Any , Callable , Final , Optional , Union
10
10
11
11
import jinja2
12
12
import partial_json_parser
@@ -790,11 +790,10 @@ async def chat_completion_stream_generator(
790
790
791
791
if self .use_harmony :
792
792
# Group consecutive tokens with same channel/recipient
793
- groups = []
793
+ groups : list [ dict [ str , str ]] = []
794
794
for channel , recipient , text in token_states :
795
- if not text :
796
- continue
797
- if groups and groups [- 1 ]['channel' ] == channel and groups [- 1 ]['recipient' ] == recipient :
795
+ if (groups and groups [- 1 ]['channel' ] == channel
796
+ and groups [- 1 ]['recipient' ] == recipient ):
798
797
groups [- 1 ]['text' ] += text
799
798
else :
800
799
groups .append ({
@@ -849,14 +848,16 @@ async def chat_completion_stream_generator(
849
848
index = next_tool_index ,
850
849
))
851
850
prev_recipient = group_recipient
852
- # Increment for any subsequent new tool calls in this chunk
851
+ # Increment for subsequent new tool calls
853
852
next_tool_index += 1
854
853
855
854
if group_text :
856
855
# Stream arguments for the ongoing tool call
857
- # The current call index is next_tool_index - 1 if we just
858
- # opened it, OR base_index if continuing from prev chunk
859
- tool_call_index = next_tool_index - 1 if next_tool_index > base_index else base_index
856
+ # Use next_tool_index - 1 if we opened a call
857
+ # this chunk, else base_index for ongoing
858
+ tool_call_index = (next_tool_index - 1
859
+ if next_tool_index > base_index
860
+ else base_index )
860
861
tool_messages .append (DeltaToolCall (
861
862
index = tool_call_index ,
862
863
function = DeltaFunctionCall (
@@ -865,7 +866,7 @@ async def chat_completion_stream_generator(
865
866
866
867
# Combine all non-empty fields into a single message
867
868
if combined_content or combined_reasoning or tool_messages :
868
- delta_kwargs = {}
869
+ delta_kwargs : dict [ str , Any ] = {}
869
870
if combined_content :
870
871
delta_kwargs ['content' ] = combined_content
871
872
if combined_reasoning :
@@ -1117,7 +1118,8 @@ async def chat_completion_stream_generator(
1117
1118
if delta_message .content :
1118
1119
delta_content_parts .append (delta_message .content )
1119
1120
if delta_message .reasoning_content :
1120
- delta_content_parts .append (f"[reasoning: { delta_message .reasoning_content } ]" )
1121
+ reasoning = delta_message .reasoning_content
1122
+ delta_content_parts .append (f"[reasoning: { reasoning } ]" )
1121
1123
if delta_message .tool_calls :
1122
1124
tool_args = "" .join (
1123
1125
tc .function .arguments
0 commit comments