@@ -3975,23 +3975,9 @@ def chatml_function_calling(
39753975 ),
39763976 )
39773977 text = completion ["choices" ][0 ]["text" ]
3978- # Extract message content and/or function call
3979- tool_name = None
3980- message_content = None
3981-
3982- if text .startswith ("message:" ):
3983- # Handle message with or without function call
3984- parts = text .split ("<function_calls>" , 1 )
3985- message_content = parts [0 ][len ("message:" ):].strip ()
3986- if len (parts ) > 1 :
3987- # Has both message and function call
3988- function_text = parts [1 ].strip ()
3989- tool_name = function_text .split ("\n " )[0 ][len ("functions." ):].rstrip (":" )
3990- else :
3991- # Only function call
3992- tool_name = text .split ("\n " )[0 ][len ("<function_calls>\n functions." ):].rstrip (":" )
3978+ tool_name = None if text .startswith ("message" ) else text .split ("\n " )[- 1 ][len ("functions." ) :]
39933979
3994- # Case 2 step 2A: Message only
3980+ # Case 2 step 2A: Respond with a message
39953981 if tool_name is None :
39963982 prompt = template_renderer .render (
39973983 messages = messages , tools = [], tool_calls = None , add_generation_prompt = True
@@ -4081,7 +4067,7 @@ def chatml_function_calling(
40814067 ),
40824068 "message" : {
40834069 "role" : "assistant" ,
4084- "content" : message_content , # Include message content when present
4070+ "content" : None ,
40854071 "tool_calls" : [
40864072 {
40874073 "id" : "call_" + f"_{ i } _" + tool_name + "_" + completion ["id" ],
@@ -4157,15 +4143,14 @@ def gguf_function_calling(
41574143 Iterator [llama_types .CreateChatCompletionStreamResponse ],
41584144]:
41594145
4160- # Try to get model-specific template from metadata
4161- model_template = None
4146+ function_calling_template = None
41624147 if hasattr (llama , 'model_path' ):
41634148 metadata = llama .metadata
41644149 if metadata and "tokenizer.chat_template" in metadata :
4165- model_template = metadata ["tokenizer.chat_template" ]
4150+ function_calling_template = metadata ["tokenizer.chat_template" ]
4151+
41664152
4167- # Use model template or fall back to default
4168- function_calling_template = model_template if model_template else (
4153+ function_calling_template = (
41694154 "{% for message in messages %}"
41704155 "<|im_start|>{{ message.role }}\n "
41714156 # System message
@@ -4189,6 +4174,7 @@ def gguf_function_calling(
41894174 "\n functions.<function_name>:"
41904175 '\n { "arg1": "value1", "arg2": "value2" }'
41914176 "\n </function_calls>"
4177+ "\n \n You can also combine both formats to provide explanatory text with function calls."
41924178 "{% endif %}"
41934179 "<|im_end|>\n "
41944180 "{% endif %}"
@@ -4309,12 +4295,15 @@ def gguf_function_calling(
43094295 )
43104296 initial_gbnf_tool_grammar = (
43114297 (
4312- 'root ::= "<function_calls>" "\\ n" functions | "message:" text | "message:" text "<function_calls>" "\\ n" functions\n '
4298+ 'root ::= message_only | message_with_functions | functions_only\n '
4299+ 'message_only ::= "message:" text\n '
4300+ 'message_with_functions ::= "message:" text "<function_calls>\\ n" functions\n '
4301+ 'functions_only ::= "<function_calls>\\ n" functions\n '
43134302 'text ::= [^<]+\n '
43144303 f"functions ::= { function_names } \n "
43154304 )
43164305 if tool_choice == "auto"
4317- else f'root ::= "<function_calls>" " \\ n" functions\n functions ::= { function_names } \n '
4306+ else f'root ::= "<function_calls>\\ n" functions\n functions ::= { function_names } \n '
43184307 )
43194308 completion = cast (
43204309 llama_types .CreateCompletionResponse ,
@@ -4333,37 +4322,49 @@ def gguf_function_calling(
43334322 ),
43344323 )
43354324 text = completion ["choices" ][0 ]["text" ]
4336- # Extract message content and/or function call
4337- tool_name = None
4325+
4326+ # Parse the response to extract message and/or function calls
43384327 message_content = None
4339-
4328+ tool_name = None
4329+
43404330 if text .startswith ("message:" ):
4341- # Handle message with or without function call
4342- parts = text .split ("<function_calls>" , 1 )
4343- message_content = parts [0 ][len ("message:" ):].strip ()
4344- if len (parts ) > 1 :
4345- # Has both message and function call
4346- tool_name = parts [1 ].split ("\n " )[- 1 ][len ("functions." ) :]
4347- else :
4348- # Only function call
4349- tool_name = text .split ("\n " )[- 1 ][len ("functions." ) :]
4331+ # Extract message content
4332+ if "<function_calls>" in text :
4333+ # Combined message and function calls
4334+ parts = text .split ("<function_calls>" , 1 )
4335+ message_content = parts [0 ][len ("message:" ):].strip ()
4336+ if len (parts ) > 1 and "functions." in parts [1 ]:
4337+ tool_name = parts [1 ].split ("functions." , 1 )[1 ].split (":" , 1 )[0 ].strip ()
4338+ else :
4339+ # Message only
4340+ message_content = text [len ("message:" ):].strip ()
4341+ elif text .startswith ("<function_calls>" ) and "functions." in text :
4342+ # Function calls only
4343+ tool_name = text .split ("functions." , 1 )[1 ].split (":" , 1 )[0 ].strip ()
43504344
4351- # Case 2 step 2A: Respond with message only
4345+ # Case 2 step 2A: Respond with message only
43524346 if tool_name is None and message_content is not None :
4353- completion ["choices" ][0 ]["text" ] = message_content
4354- return _convert_completion_to_chat (completion , stream = stream )
4347+ prompt = template_renderer .render (
4348+ messages = messages , tools = [], tool_calls = None , add_generation_prompt = True
4349+ )
4350+ completion_response = llama .create_completion (
4351+ prompt = prompt ,
4352+ ** completion_kwargs , # type: ignore[arg-type]
4353+ logprobs = top_logprobs if logprobs else None ,
4354+ )
4355+ completion_response ["choices" ][0 ]["text" ] = message_content
4356+ return _convert_completion_to_chat (completion_response , stream = stream )
43554357
4356- # Case 2 step 2B: One or more function calls
4357- follow_up_gbnf_tool_grammar = (
4358- 'root ::= functions | "</function_calls>" | "<|im_end|>"\n '
4359- f"functions ::= { function_names } \n "
4358+ # Case 2 step 2B: One or more function calls
4359+ follow_up_gbnf_tool_grammar = (
4360+ 'root ::= functions | "</function_calls>" | "<|im_end|>"\n '
4361+ f"functions ::= { function_names } \n "
4362+ )
4363+ prompt += "<function_calls>\n "
4364+ if stream :
4365+ return _stream_tool_calls (
4366+ llama , prompt , tools , tool_name , completion_kwargs , follow_up_gbnf_tool_grammar
43604367 )
4361- prompt += "<function_calls>\n "
4362- if stream :
4363- return _stream_tool_calls (
4364- llama , prompt , tools , tool_name , completion_kwargs , follow_up_gbnf_tool_grammar
4365- )
4366-
43674368 tool = next ((tool for tool in tools if tool ["function" ]["name" ] == tool_name ), None )
43684369 completions : List [llama_types .CreateCompletionResponse ] = []
43694370 completions_tool_name : List [str ] = []
@@ -4430,7 +4431,7 @@ def gguf_function_calling(
44304431 ),
44314432 "message" : {
44324433 "role" : "assistant" ,
4433- "content" : message_content , # Include message content if present
4434+ "content" : None ,
44344435 "tool_calls" : [
44354436 {
44364437 "id" : "call_" + f"_{ i } _" + tool_name + "_" + completion ["id" ],
0 commit comments