@@ -4143,14 +4143,15 @@ def gguf_function_calling(
41434143 Iterator [llama_types .CreateChatCompletionStreamResponse ],
41444144]:
41454145
4146- function_calling_template = None
4146+ # Try to get model-specific template from metadata
4147+ model_template = None
41474148 if hasattr (llama , 'model_path' ):
41484149 metadata = llama .metadata
41494150 if metadata and "tokenizer.chat_template" in metadata :
4150- function_calling_template = metadata ["tokenizer.chat_template" ]
4151+ model_template = metadata ["tokenizer.chat_template" ]
41514152
4152-
4153- function_calling_template = (
4153+ # Use model template or fall back to default
4154+ function_calling_template = model_template if model_template else (
41544155 "{% for message in messages %}"
41554156 "<|im_start|>{{ message.role }}\n "
41564157 # System message
@@ -4294,7 +4295,11 @@ def gguf_function_calling(
42944295 )
42954296 initial_gbnf_tool_grammar = (
42964297 (
4297- 'root ::= "<function_calls>" "\\ n" functions | "message:"\n '
4298+ 'root ::= message_only | message_with_functions | functions_only\n '
4299+ 'message_only ::= "message:" text\n '
4300+ 'message_with_functions ::= "message:" text "<function_calls>\\ n" functions\n '
4301+ 'functions_only ::= "<function_calls>\\ n" functions\n '
4302+ 'text ::= [^<]+\n '
42984303 f"functions ::= { function_names } \n "
42994304 )
43004305 if tool_choice == "auto"
@@ -4317,32 +4322,37 @@ def gguf_function_calling(
43174322 ),
43184323 )
43194324 text = completion ["choices" ][0 ]["text" ]
4320- tool_name = None if text .startswith ("message" ) else text .split ("\n " )[- 1 ][len ("functions." ) :]
4325+ # Extract message content and/or function call
4326+ tool_name = None
4327+ message_content = None
4328+
4329+ if text .startswith ("message:" ):
4330+ # Handle message with or without function call
4331+ parts = text .split ("<function_calls>" , 1 )
4332+ message_content = parts [0 ][len ("message:" ):].strip ()
4333+ if len (parts ) > 1 :
4334+ # Has both message and function call
4335+ tool_name = parts [1 ].split ("\n " )[- 1 ][len ("functions." ) :]
4336+ else :
4337+ # Only function call
4338+ tool_name = text .split ("\n " )[- 1 ][len ("functions." ) :]
43214339
4322- # Case 2 step 2A: Respond with a message
4323- if tool_name is None :
4324- prompt = template_renderer .render (
4325- messages = messages , tools = [], tool_calls = None , add_generation_prompt = True
4326- )
4327- return _convert_completion_to_chat (
4328- llama .create_completion (
4329- prompt = prompt ,
4330- ** completion_kwargs , # type: ignore[arg-type]
4331- logprobs = top_logprobs if logprobs else None ,
4332- ),
4333- stream = stream ,
4334- )
4340+ # Case 2 step 2A: Respond with message only
4341+ if tool_name is None and message_content is not None :
4342+ completion ["choices" ][0 ]["text" ] = message_content
4343+ return _convert_completion_to_chat (completion , stream = stream )
43354344
4336- # Case 2 step 2B: One or more function calls
4337- follow_up_gbnf_tool_grammar = (
4338- 'root ::= functions | "</function_calls>" | "<|im_end|>"\n '
4339- f"functions ::= { function_names } \n "
4340- )
4341- prompt += "<function_calls>\n "
4342- if stream :
4343- return _stream_tool_calls (
4344- llama , prompt , tools , tool_name , completion_kwargs , follow_up_gbnf_tool_grammar
4345+ # Case 2 step 2B: One or more function calls
4346+ follow_up_gbnf_tool_grammar = (
4347+ 'root ::= functions | "</function_calls>" | "<|im_end|>"\n '
4348+ f"functions ::= { function_names } \n "
43454349 )
4350+ prompt += "<function_calls>\n "
4351+ if stream :
4352+ return _stream_tool_calls (
4353+ llama , prompt , tools , tool_name , completion_kwargs , follow_up_gbnf_tool_grammar
4354+ )
4355+
43464356 tool = next ((tool for tool in tools if tool ["function" ]["name" ] == tool_name ), None )
43474357 completions : List [llama_types .CreateCompletionResponse ] = []
43484358 completions_tool_name : List [str ] = []
@@ -4409,7 +4419,7 @@ def gguf_function_calling(
44094419 ),
44104420 "message" : {
44114421 "role" : "assistant" ,
4412- "content" : None ,
4422+ "content" : message_content , # Include message content if present
44134423 "tool_calls" : [
44144424 {
44154425 "id" : "call_" + f"_{ i } _" + tool_name + "_" + completion ["id" ],
0 commit comments