16
16
from vllm .config import ModelConfig
17
17
from vllm .engine .protocol import EngineClient
18
18
from vllm .entrypoints .chat_utils import (ChatTemplateContentFormatOption ,
19
- ConversationMessage )
19
+ ConversationMessage ,
20
+ random_tool_call_id )
20
21
from vllm .entrypoints .logger import RequestLogger
21
22
from vllm .entrypoints .openai .protocol import (
22
23
ChatCompletionLogProb , ChatCompletionLogProbs ,
@@ -363,9 +364,10 @@ def extract_tool_call_required_streaming(
363
364
364
365
function_name_returned = True
365
366
delta_message = DeltaMessage (tool_calls = [
366
- DeltaToolCall (function = DeltaFunctionCall (
367
- name = current_tool_call ["name" ],
368
- arguments = arguments ),
367
+ DeltaToolCall (id = random_tool_call_id (),
368
+ function = DeltaFunctionCall (
369
+ name = current_tool_call ["name" ],
370
+ arguments = arguments ),
369
371
index = len (obj ) - 1 ,
370
372
type = "function" )
371
373
])
@@ -382,8 +384,7 @@ def extract_tool_call_required_streaming(
382
384
# instead of name every time
383
385
name = None ,
384
386
arguments = delta_text ),
385
- index = len (obj ) - 1 ,
386
- type = "function" )
387
+ index = len (obj ) - 1 )
387
388
])
388
389
else :
389
390
delta_message = None
@@ -422,7 +423,7 @@ async def chat_completion_stream_generator(
422
423
and self ._should_stream_with_auto_tool_parsing (request ))
423
424
424
425
all_previous_token_ids : Optional [list [list [int ]]]
425
- function_name_returned : Optional [ list [ bool ]] = None
426
+ function_name_returned = [ False ] * num_choices
426
427
427
428
# Only one of these will be used, thus previous_texts and
428
429
# all_previous_token_ids will not be used twice in the same iteration.
@@ -435,7 +436,6 @@ async def chat_completion_stream_generator(
435
436
reasoning_end_arr = [False ] * num_choices
436
437
elif request .tool_choice == "required" :
437
438
previous_texts = ["" ] * num_choices
438
- function_name_returned = [False ] * num_choices
439
439
all_previous_token_ids = None
440
440
else :
441
441
previous_texts , all_previous_token_ids = None , None
@@ -623,16 +623,27 @@ async def chat_completion_stream_generator(
623
623
delta_text = previous_text + delta_text
624
624
current_text = ""
625
625
626
+ if function_name_returned [i ]:
627
+ delta_tool_call = DeltaToolCall (
628
+ function = DeltaFunctionCall (
629
+ arguments = delta_text ),
630
+ index = i )
631
+ else :
632
+ delta_tool_call = DeltaToolCall (
633
+ id = random_tool_call_id (),
634
+ type = "function" ,
635
+ function = DeltaFunctionCall (
636
+ name = tool_choice_function_name ,
637
+ arguments = delta_text ),
638
+ index = i )
639
+ function_name_returned [i ] = True
640
+
626
641
delta_message = DeltaMessage (tool_calls = [
627
- DeltaToolCall (function = DeltaFunctionCall (
628
- name = tool_choice_function_name ,
629
- arguments = delta_text ),
630
- index = i )
642
+ delta_tool_call ,
631
643
])
632
644
633
645
elif request .tool_choice == "required" :
634
646
assert previous_texts is not None
635
- assert function_name_returned is not None
636
647
previous_text = previous_texts [i ]
637
648
current_text = previous_text + delta_text
638
649
fn_name_returned = function_name_returned [i ]
@@ -835,7 +846,7 @@ async def chat_completion_stream_generator(
835
846
total_tokens = num_prompt_tokens + completion_tokens ,
836
847
)
837
848
838
- data = chunk .model_dump_json (exclude_unset = True )
849
+ data = chunk .model_dump_json (exclude_none = True )
839
850
yield f"data: { data } \n \n "
840
851
841
852
# once the final token is handled, if stream_options.include_usage
0 commit comments