Skip to content

Commit dccabe6

Browse files
committed
improve gguf-function-calling parser
1 parent 8bf0f44 commit dccabe6

File tree

1 file changed

+20
-51
lines changed

1 file changed

+20
-51
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 20 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -4107,7 +4107,6 @@ def chatml_function_calling(
41074107
chat_completion["choices"][0]["message"]["function_call"] = single_function_call
41084108
return chat_completion
41094109

4110-
41114110
@register_chat_completion_handler("gguf-function-calling")
41124111
def gguf_function_calling(
41134112
llama: llama.Llama,
@@ -4142,18 +4141,9 @@ def gguf_function_calling(
41424141
llama_types.CreateChatCompletionResponse,
41434142
Iterator[llama_types.CreateChatCompletionStreamResponse],
41444143
]:
4145-
4146-
function_calling_template = None
4147-
if hasattr(llama, 'model_path'):
4148-
metadata = llama.metadata
4149-
if metadata and "tokenizer.chat_template" in metadata:
4150-
function_calling_template = metadata["tokenizer.chat_template"]
4151-
4152-
41534144
function_calling_template = (
41544145
"{% for message in messages %}"
41554146
"<|im_start|>{{ message.role }}\n"
4156-
# System message
41574147
"{% if message.role == 'system' %}"
41584148
"{{ message.content }}"
41594149
"{% if tool_calls %}"
@@ -4178,22 +4168,16 @@ def gguf_function_calling(
41784168
"{% endif %}"
41794169
"<|im_end|>\n"
41804170
"{% endif %}"
4181-
# User message
41824171
"{% if message.role == 'user' %}"
41834172
"{{ message.content }}"
41844173
"<|im_end|>\n"
41854174
"{% endif %}"
4186-
# Assistant message
41874175
"{% if message.role == 'assistant' %}"
4188-
## Regular message
41894176
"{% if message.content and message.content | length > 0 %}"
4190-
"{% if tool_calls %}"
4191-
"message:\n"
4192-
"{% endif %}"
4177+
"{% if tool_calls %}message:\n{% endif %}"
41934178
"{{ message.content }}"
41944179
"<|im_end|>\n"
41954180
"{% endif %}"
4196-
## Function calls
41974181
"{% if 'tool_calls' in message %}"
41984182
"{% for tool_call in message.tool_calls %}"
41994183
"functions.{{ tool_call.function.name }}:\n"
@@ -4210,27 +4194,23 @@ def gguf_function_calling(
42104194
undefined=jinja2.StrictUndefined,
42114195
).from_string(function_calling_template)
42124196

4213-
# Convert legacy functions to tools
42144197
if functions is not None:
42154198
tools = [{"type": "function", "function": function} for function in functions]
42164199

4217-
# Convert legacy function_call to tool_choice
42184200
if function_call is not None:
42194201
if isinstance(function_call, str) and (function_call in ("none", "auto")):
42204202
tool_choice = function_call
42214203
if isinstance(function_call, dict) and "name" in function_call:
42224204
tool_choice = {"type": "function", "function": {"name": function_call["name"]}}
42234205

4224-
# Collect the llama.create_completion keyword arguments so we don't have to repeat these with
4225-
# each completion call
42264206
stop = (
42274207
[stop, "<|im_end|>"]
42284208
if isinstance(stop, str)
42294209
else [*stop, "<|im_end|>"]
42304210
if stop
42314211
else ["<|im_end|>"]
42324212
)
4233-
grammar = ( # It is assumed the grammar applies to messages only, not tool calls
4213+
grammar = (
42344214
grammar
42354215
if grammar is not None
42364216
else (
@@ -4260,7 +4240,6 @@ def gguf_function_calling(
42604240
"grammar": grammar,
42614241
}
42624242

4263-
# Case 1: No tool use
42644243
if (
42654244
tool_choice is None
42664245
or (isinstance(tool_choice, str) and tool_choice == "none")
@@ -4273,18 +4252,15 @@ def gguf_function_calling(
42734252
return _convert_completion_to_chat(
42744253
llama.create_completion(
42754254
prompt=prompt,
4276-
**completion_kwargs, # type: ignore[arg-type]
4255+
**completion_kwargs,
42774256
logprobs=top_logprobs if logprobs else None,
42784257
),
42794258
stream=stream,
42804259
)
42814260

4282-
# Ensure there is a system prompt to attach the tool metadata to
42834261
if not any(message["role"] == "system" for message in messages):
42844262
messages = [*messages, {"role": "system", "content": ""}]
42854263

4286-
# Case 2: Automatic or fixed tool choice
4287-
# Case 2 step 1: Determine whether to respond with a message or a tool call
42884264
assert (isinstance(tool_choice, str) and tool_choice == "auto") or isinstance(tool_choice, dict)
42894265
if isinstance(tool_choice, dict):
42904266
tools = [t for t in tools if t["function"]["name"] == tool_choice["function"]["name"]]
@@ -4309,7 +4285,7 @@ def gguf_function_calling(
43094285
llama_types.CreateCompletionResponse,
43104286
llama.create_completion(
43114287
prompt=prompt,
4312-
**{ # type: ignore[arg-type]
4288+
**{
43134289
**completion_kwargs,
43144290
"temperature": 0,
43154291
"stream": False,
@@ -4322,40 +4298,33 @@ def gguf_function_calling(
43224298
),
43234299
)
43244300
text = completion["choices"][0]["text"]
4325-
4326-
# Parse the response to extract message and/or function calls
4301+
43274302
message_content = None
43284303
tool_name = None
4329-
4304+
43304305
if text.startswith("message:"):
4331-
# Extract message content
43324306
if "<function_calls>" in text:
4333-
# Combined message and function calls
43344307
parts = text.split("<function_calls>", 1)
43354308
message_content = parts[0][len("message:"):].strip()
43364309
if len(parts) > 1 and "functions." in parts[1]:
43374310
tool_name = parts[1].split("functions.", 1)[1].split(":", 1)[0].strip()
43384311
else:
4339-
# Message only
43404312
message_content = text[len("message:"):].strip()
43414313
elif text.startswith("<function_calls>") and "functions." in text:
4342-
# Function calls only
43434314
tool_name = text.split("functions.", 1)[1].split(":", 1)[0].strip()
43444315

4345-
# Case 2 step 2A: Respond with message only
43464316
if tool_name is None and message_content is not None:
43474317
prompt = template_renderer.render(
43484318
messages=messages, tools=[], tool_calls=None, add_generation_prompt=True
43494319
)
43504320
completion_response = llama.create_completion(
43514321
prompt=prompt,
4352-
**completion_kwargs, # type: ignore[arg-type]
4322+
**completion_kwargs,
43534323
logprobs=top_logprobs if logprobs else None,
43544324
)
43554325
completion_response["choices"][0]["text"] = message_content
43564326
return _convert_completion_to_chat(completion_response, stream=stream)
43574327

4358-
# Case 2 step 2B: One or more function calls
43594328
follow_up_gbnf_tool_grammar = (
43604329
'root ::= functions | "</function_calls>" | "<|im_end|>"\n'
43614330
f"functions ::= {function_names}\n"
@@ -4369,7 +4338,6 @@ def gguf_function_calling(
43694338
completions: List[llama_types.CreateCompletionResponse] = []
43704339
completions_tool_name: List[str] = []
43714340
while tool is not None and len(completions) <= 16:
4372-
# Generate the parameter values for the selected tool
43734341
prompt += f"functions.{tool_name}:\n"
43744342
try:
43754343
grammar = llama_grammar.LlamaGrammar.from_json_schema(
@@ -4386,7 +4354,7 @@ def gguf_function_calling(
43864354
)
43874355
completion_or_chunks = llama.create_completion(
43884356
prompt=prompt,
4389-
**{ # type: ignore[arg-type]
4357+
**{
43904358
**completion_kwargs,
43914359
"max_tokens": None,
43924360
"grammar": grammar,
@@ -4397,41 +4365,40 @@ def gguf_function_calling(
43974365
completions_tool_name.append(tool_name)
43984366
prompt += completion["choices"][0]["text"]
43994367
prompt += "\n"
4400-
# Determine whether to call another tool or stop
44014368
response = cast(
44024369
llama_types.CreateCompletionResponse,
44034370
llama.create_completion(
44044371
prompt=prompt,
4405-
**{ # type: ignore[arg-type]
4372+
**{
44064373
**completion_kwargs,
44074374
"temperature": 0,
44084375
"stream": False,
4409-
"stop": [*completion_kwargs["stop"], ":", "</function_calls>"], # type: ignore[misc]
4376+
"stop": [*completion_kwargs["stop"], ":", "</function_calls>"],
44104377
"max_tokens": None,
44114378
"grammar": llama_grammar.LlamaGrammar.from_string(
44124379
follow_up_gbnf_tool_grammar, verbose=llama.verbose
44134380
),
44144381
},
44154382
),
44164383
)
4417-
tool_name = response["choices"][0]["text"][len("functions.") :]
4418-
tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None)
4419-
# Merge the completions into a single chat completion
4384+
tool_name = response["choices"][0]["text"][len("functions.") :] if response["choices"][0]["text"].startswith("functions.") else None
4385+
tool = next((tool for tool in tools if tool_name and tool["function"]["name"] == tool_name), None)
4386+
44204387
chat_completion: llama_types.CreateChatCompletionResponse = {
44214388
"id": "chat" + completion["id"],
44224389
"object": "chat.completion",
44234390
"created": completion["created"],
44244391
"model": completion["model"],
44254392
"choices": [
44264393
{
4427-
"finish_reason": "tool_calls",
4394+
"finish_reason": "tool_calls" if completions else "stop",
44284395
"index": 0,
44294396
"logprobs": _convert_text_completion_logprobs_to_chat(
44304397
completion["choices"][0]["logprobs"]
44314398
),
44324399
"message": {
44334400
"role": "assistant",
4434-
"content": message_content, # Include message content if present
4401+
"content": message_content,
44354402
"tool_calls": [
44364403
{
44374404
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],
@@ -4444,7 +4411,9 @@ def gguf_function_calling(
44444411
for i, (tool_name, completion) in enumerate(
44454412
zip(completions_tool_name, completions)
44464413
)
4447-
],
4414+
]
4415+
if completions
4416+
else None,
44484417
},
44494418
}
44504419
],
@@ -4465,8 +4434,8 @@ def gguf_function_calling(
44654434
}
44664435
if len(completions) == 1:
44674436
single_function_call: llama_types.ChatCompletionResponseFunctionCall = {
4468-
"name": tool_name,
4437+
"name": completions_tool_name[0],
44694438
"arguments": completions[0]["choices"][0]["text"],
44704439
}
44714440
chat_completion["choices"][0]["message"]["function_call"] = single_function_call
4472-
return chat_completion
4441+
return chat_completion

0 commit comments

Comments
 (0)