@@ -513,15 +513,30 @@ def _handle_streaming_tool_calls(
513513
514514 # After completing the tool call parameters, continue with more completions
515515 # Recursively handle the next completion by starting a new generation
516- yield from _handle_streaming_tool_calls (
517- tools ,
518- prompt + llama .tokenize ((accumulated_text + "\n </tool_call>\n " ).encode ("utf-8" ), add_bos = False , special = True ),
519- llama ,
520- base_completion_kwargs ,
521- stopping_criteria = stopping_criteria ,
522- grammar = grammar ,
523- tool_call_index = tool_call_index + 1 # Increment index for potential next tool call
524- )
516+ # yield from _handle_streaming_tool_calls(
517+ # tools,
518+ # prompt + llama.tokenize((accumulated_text + "\n</tool_call>\n").encode("utf-8"), add_bos=False, special=True),
519+ # llama,
520+ # base_completion_kwargs,
521+ # stopping_criteria=stopping_criteria,
522+ # grammar=grammar,
523+ # tool_call_index=tool_call_index + 1 # Increment index for potential next tool call
524+ # )
525+ # Commented out recursive tool calling
526+ # After completing the tool call parameters, we stop here
527+ # Instead of recursively calling for more tool calls, we end normally
528+ # Generate a finish_reason chunk to indicate completion
529+ yield {
530+ "id" : "chat" + name_completion ["id" ],
531+ "object" : "chat.completion.chunk" ,
532+ "created" : name_completion ["created" ],
533+ "model" : name_completion ["model" ],
534+ "choices" : [{
535+ "index" : 0 ,
536+ "delta" : {},
537+ "finish_reason" : "stop"
538+ }]
539+ }
525540 except Exception as e :
526541 # Fall back to regular streaming without grammar
527542 fallback_prompt = prompt + llama .tokenize (accumulated_text .encode ("utf-8" ), add_bos = False , special = True )
0 commit comments