@@ -4107,7 +4107,6 @@ def chatml_function_calling(
41074107 chat_completion ["choices" ][0 ]["message" ]["function_call" ] = single_function_call
41084108 return chat_completion
41094109
4110-
41114110@register_chat_completion_handler ("gguf-function-calling" )
41124111def gguf_function_calling (
41134112 llama : llama .Llama ,
@@ -4142,18 +4141,9 @@ def gguf_function_calling(
41424141 llama_types .CreateChatCompletionResponse ,
41434142 Iterator [llama_types .CreateChatCompletionStreamResponse ],
41444143]:
4145-
4146- function_calling_template = None
4147- if hasattr (llama , 'model_path' ):
4148- metadata = llama .metadata
4149- if metadata and "tokenizer.chat_template" in metadata :
4150- function_calling_template = metadata ["tokenizer.chat_template" ]
4151-
4152-
41534144 function_calling_template = (
41544145 "{% for message in messages %}"
41554146 "<|im_start|>{{ message.role }}\n "
4156- # System message
41574147 "{% if message.role == 'system' %}"
41584148 "{{ message.content }}"
41594149 "{% if tool_calls %}"
@@ -4178,22 +4168,16 @@ def gguf_function_calling(
41784168 "{% endif %}"
41794169 "<|im_end|>\n "
41804170 "{% endif %}"
4181- # User message
41824171 "{% if message.role == 'user' %}"
41834172 "{{ message.content }}"
41844173 "<|im_end|>\n "
41854174 "{% endif %}"
4186- # Assistant message
41874175 "{% if message.role == 'assistant' %}"
4188- ## Regular message
41894176 "{% if message.content and message.content | length > 0 %}"
4190- "{% if tool_calls %}"
4191- "message:\n "
4192- "{% endif %}"
4177+ "{% if tool_calls %}message:\n {% endif %}"
41934178 "{{ message.content }}"
41944179 "<|im_end|>\n "
41954180 "{% endif %}"
4196- ## Function calls
41974181 "{% if 'tool_calls' in message %}"
41984182 "{% for tool_call in message.tool_calls %}"
41994183 "functions.{{ tool_call.function.name }}:\n "
@@ -4210,27 +4194,23 @@ def gguf_function_calling(
42104194 undefined = jinja2 .StrictUndefined ,
42114195 ).from_string (function_calling_template )
42124196
4213- # Convert legacy functions to tools
42144197 if functions is not None :
42154198 tools = [{"type" : "function" , "function" : function } for function in functions ]
42164199
4217- # Convert legacy function_call to tool_choice
42184200 if function_call is not None :
42194201 if isinstance (function_call , str ) and (function_call in ("none" , "auto" )):
42204202 tool_choice = function_call
42214203 if isinstance (function_call , dict ) and "name" in function_call :
42224204 tool_choice = {"type" : "function" , "function" : {"name" : function_call ["name" ]}}
42234205
4224- # Collect the llama.create_completion keyword arguments so we don't have to repeat these with
4225- # each completion call
42264206 stop = (
42274207 [stop , "<|im_end|>" ]
42284208 if isinstance (stop , str )
42294209 else [* stop , "<|im_end|>" ]
42304210 if stop
42314211 else ["<|im_end|>" ]
42324212 )
4233- grammar = ( # It is assumed the grammar applies to messages only, not tool calls
4213+ grammar = (
42344214 grammar
42354215 if grammar is not None
42364216 else (
@@ -4260,7 +4240,6 @@ def gguf_function_calling(
42604240 "grammar" : grammar ,
42614241 }
42624242
4263- # Case 1: No tool use
42644243 if (
42654244 tool_choice is None
42664245 or (isinstance (tool_choice , str ) and tool_choice == "none" )
@@ -4273,18 +4252,15 @@ def gguf_function_calling(
42734252 return _convert_completion_to_chat (
42744253 llama .create_completion (
42754254 prompt = prompt ,
4276- ** completion_kwargs , # type: ignore[arg-type]
4255+ ** completion_kwargs ,
42774256 logprobs = top_logprobs if logprobs else None ,
42784257 ),
42794258 stream = stream ,
42804259 )
42814260
4282- # Ensure there is a system prompt to attach the tool metadata to
42834261 if not any (message ["role" ] == "system" for message in messages ):
42844262 messages = [* messages , {"role" : "system" , "content" : "" }]
42854263
4286- # Case 2: Automatic or fixed tool choice
4287- # Case 2 step 1: Determine whether to respond with a message or a tool call
42884264 assert (isinstance (tool_choice , str ) and tool_choice == "auto" ) or isinstance (tool_choice , dict )
42894265 if isinstance (tool_choice , dict ):
42904266 tools = [t for t in tools if t ["function" ]["name" ] == tool_choice ["function" ]["name" ]]
@@ -4309,7 +4285,7 @@ def gguf_function_calling(
43094285 llama_types .CreateCompletionResponse ,
43104286 llama .create_completion (
43114287 prompt = prompt ,
4312- ** { # type: ignore[arg-type]
4288+ ** {
43134289 ** completion_kwargs ,
43144290 "temperature" : 0 ,
43154291 "stream" : False ,
@@ -4322,40 +4298,33 @@ def gguf_function_calling(
43224298 ),
43234299 )
43244300 text = completion ["choices" ][0 ]["text" ]
4325-
4326- # Parse the response to extract message and/or function calls
4301+
43274302 message_content = None
43284303 tool_name = None
4329-
4304+
43304305 if text .startswith ("message:" ):
4331- # Extract message content
43324306 if "<function_calls>" in text :
4333- # Combined message and function calls
43344307 parts = text .split ("<function_calls>" , 1 )
43354308 message_content = parts [0 ][len ("message:" ):].strip ()
43364309 if len (parts ) > 1 and "functions." in parts [1 ]:
43374310 tool_name = parts [1 ].split ("functions." , 1 )[1 ].split (":" , 1 )[0 ].strip ()
43384311 else :
4339- # Message only
43404312 message_content = text [len ("message:" ):].strip ()
43414313 elif text .startswith ("<function_calls>" ) and "functions." in text :
4342- # Function calls only
43434314 tool_name = text .split ("functions." , 1 )[1 ].split (":" , 1 )[0 ].strip ()
43444315
4345- # Case 2 step 2A: Respond with message only
43464316 if tool_name is None and message_content is not None :
43474317 prompt = template_renderer .render (
43484318 messages = messages , tools = [], tool_calls = None , add_generation_prompt = True
43494319 )
43504320 completion_response = llama .create_completion (
43514321 prompt = prompt ,
4352- ** completion_kwargs , # type: ignore[arg-type]
4322+ ** completion_kwargs ,
43534323 logprobs = top_logprobs if logprobs else None ,
43544324 )
43554325 completion_response ["choices" ][0 ]["text" ] = message_content
43564326 return _convert_completion_to_chat (completion_response , stream = stream )
43574327
4358- # Case 2 step 2B: One or more function calls
43594328 follow_up_gbnf_tool_grammar = (
43604329 'root ::= functions | "</function_calls>" | "<|im_end|>"\n '
43614330 f"functions ::= { function_names } \n "
@@ -4369,7 +4338,6 @@ def gguf_function_calling(
43694338 completions : List [llama_types .CreateCompletionResponse ] = []
43704339 completions_tool_name : List [str ] = []
43714340 while tool is not None and len (completions ) <= 16 :
4372- # Generate the parameter values for the selected tool
43734341 prompt += f"functions.{ tool_name } :\n "
43744342 try :
43754343 grammar = llama_grammar .LlamaGrammar .from_json_schema (
@@ -4386,7 +4354,7 @@ def gguf_function_calling(
43864354 )
43874355 completion_or_chunks = llama .create_completion (
43884356 prompt = prompt ,
4389- ** { # type: ignore[arg-type]
4357+ ** {
43904358 ** completion_kwargs ,
43914359 "max_tokens" : None ,
43924360 "grammar" : grammar ,
@@ -4397,41 +4365,40 @@ def gguf_function_calling(
43974365 completions_tool_name .append (tool_name )
43984366 prompt += completion ["choices" ][0 ]["text" ]
43994367 prompt += "\n "
4400- # Determine whether to call another tool or stop
44014368 response = cast (
44024369 llama_types .CreateCompletionResponse ,
44034370 llama .create_completion (
44044371 prompt = prompt ,
4405- ** { # type: ignore[arg-type]
4372+ ** {
44064373 ** completion_kwargs ,
44074374 "temperature" : 0 ,
44084375 "stream" : False ,
4409- "stop" : [* completion_kwargs ["stop" ], ":" , "</function_calls>" ], # type: ignore[misc]
4376+ "stop" : [* completion_kwargs ["stop" ], ":" , "</function_calls>" ],
44104377 "max_tokens" : None ,
44114378 "grammar" : llama_grammar .LlamaGrammar .from_string (
44124379 follow_up_gbnf_tool_grammar , verbose = llama .verbose
44134380 ),
44144381 },
44154382 ),
44164383 )
4417- tool_name = response ["choices" ][0 ]["text" ][len ("functions." ) :]
4418- tool = next ((tool for tool in tools if tool ["function" ]["name" ] == tool_name ), None )
4419- # Merge the completions into a single chat completion
4384+ tool_name = response ["choices" ][0 ]["text" ][len ("functions." ) :] if response [ "choices" ][ 0 ][ "text" ]. startswith ( "functions." ) else None
4385+ tool = next ((tool for tool in tools if tool_name and tool ["function" ]["name" ] == tool_name ), None )
4386+
44204387 chat_completion : llama_types .CreateChatCompletionResponse = {
44214388 "id" : "chat" + completion ["id" ],
44224389 "object" : "chat.completion" ,
44234390 "created" : completion ["created" ],
44244391 "model" : completion ["model" ],
44254392 "choices" : [
44264393 {
4427- "finish_reason" : "tool_calls" ,
4394+ "finish_reason" : "tool_calls" if completions else "stop" ,
44284395 "index" : 0 ,
44294396 "logprobs" : _convert_text_completion_logprobs_to_chat (
44304397 completion ["choices" ][0 ]["logprobs" ]
44314398 ),
44324399 "message" : {
44334400 "role" : "assistant" ,
4434- "content" : message_content , # Include message content if present
4401+ "content" : message_content ,
44354402 "tool_calls" : [
44364403 {
44374404 "id" : "call_" + f"_{ i } _" + tool_name + "_" + completion ["id" ],
@@ -4444,7 +4411,9 @@ def gguf_function_calling(
44444411 for i , (tool_name , completion ) in enumerate (
44454412 zip (completions_tool_name , completions )
44464413 )
4447- ],
4414+ ]
4415+ if completions
4416+ else None ,
44484417 },
44494418 }
44504419 ],
@@ -4465,8 +4434,8 @@ def gguf_function_calling(
44654434 }
44664435 if len (completions ) == 1 :
44674436 single_function_call : llama_types .ChatCompletionResponseFunctionCall = {
4468- "name" : tool_name ,
4437+ "name" : completions_tool_name [ 0 ] ,
44694438 "arguments" : completions [0 ]["choices" ][0 ]["text" ],
44704439 }
44714440 chat_completion ["choices" ][0 ]["message" ]["function_call" ] = single_function_call
4472- return chat_completion
4441+ return chat_completion
0 commit comments