@@ -2322,7 +2322,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23222322 prompt = prompt
23232323 stops = ["\n " , END_ASSISTANT_TOKEN ]
23242324
2325- completion = create_completion (stop = stops )
2325+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
23262326 completion_text = completion ["choices" ][0 ]["text" ]
23272327 completion_tokens += completion ["usage" ]["completion_tokens" ]
23282328
@@ -2349,7 +2349,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23492349 completion_text .split (START_FUNCTION_CALL_TOKEN )[- 1 ][:- 1 ].strip ()
23502350 )
23512351 grammar = get_grammar (function_calls [- 1 ])
2352- completion = create_completion (stop = END_FUNCTION_CALL_TOKEN )
2352+ completion = create_completion (prompt = prompt , stop = END_FUNCTION_CALL_TOKEN , grammar = grammar )
23532353 completion_tokens += completion ["usage" ]["completion_tokens" ]
23542354 function_bodies .append (completion ["choices" ][0 ]["text" ].strip ())
23552355 # If the prompt involves a function call, just append generated parameters to function_bodies
@@ -2363,7 +2363,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23632363 function_calls .append (function_call )
23642364 grammar = get_grammar (function_call )
23652365 stops = [STOP_TOKEN , FROM_TOKEN ]
2366- completion = create_completion (stop = stops )
2366+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
23672367 completion_text = completion ["choices" ][0 ]["text" ]
23682368 completion_tokens += completion ["usage" ]["completion_tokens" ]
23692369 function_bodies .append (completion_text .strip ())
@@ -2373,7 +2373,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23732373 # Generate function name first
23742374 grammar = None
23752375 stops = CONTENT_TOKEN
2376- completion = create_completion (stop = stops )
2376+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
23772377 completion_text = completion ["choices" ][0 ]["text" ]
23782378 completion_tokens += completion ["usage" ]["completion_tokens" ]
23792379 function_name = completion_text .strip ()
@@ -2386,7 +2386,7 @@ def generate_streaming(tools, functions, function_call, prompt):
23862386 grammar = get_grammar (function_call )
23872387 # Generate content
23882388 stops = [RECIPIENT_TOKEN , STOP_TOKEN ]
2389- completion = create_completion (stop = stops )
2389+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
23902390 completion_text = completion ["choices" ][0 ]["text" ]
23912391 completion_tokens += completion ["usage" ]["completion_tokens" ]
23922392 if function_name == "all" :
@@ -2413,7 +2413,7 @@ def generate_streaming(tools, functions, function_call, prompt):
24132413 # Check whether the model wants to generate another turn
24142414 prompt += completion_text .strip ()
24152415 grammar = None
2416- completion = create_completion (stop = stops )
2416+ completion = create_completion (prompt = prompt , stop = stops , grammar = grammar )
24172417 completion_tokens += completion ["usage" ]["completion_tokens" ]
24182418 if "<|from|> assistant" in completion ["choices" ][0 ]["text" ] or "<|from|>assistant" in completion ["choices" ][0 ]["text" ]:
24192419 prompt += "\n <|from|>assistant\n <|recipient|>"
@@ -3564,4 +3564,4 @@ def chatml_function_calling(
35643564 },
35653565 }
35663566
3567- raise ValueError ("Automatic streaming tool choice is not supported" )
3567+ raise ValueError ("Automatic streaming tool choice is not supported" )
0 commit comments