Skip to content

Commit 640e59f

Browse files
committed
improve gguf-function-calling parser
1 parent b201d43 commit 640e59f

File tree

1 file changed

+51
-50
lines changed

1 file changed

+51
-50
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 51 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -3975,23 +3975,9 @@ def chatml_function_calling(
39753975
),
39763976
)
39773977
text = completion["choices"][0]["text"]
3978-
# Extract message content and/or function call
3979-
tool_name = None
3980-
message_content = None
3981-
3982-
if text.startswith("message:"):
3983-
# Handle message with or without function call
3984-
parts = text.split("<function_calls>", 1)
3985-
message_content = parts[0][len("message:"):].strip()
3986-
if len(parts) > 1:
3987-
# Has both message and function call
3988-
function_text = parts[1].strip()
3989-
tool_name = function_text.split("\n")[0][len("functions."):].rstrip(":")
3990-
else:
3991-
# Only function call
3992-
tool_name = text.split("\n")[0][len("<function_calls>\nfunctions."):].rstrip(":")
3978+
tool_name = None if text.startswith("message") else text.split("\n")[-1][len("functions.") :]
39933979

3994-
# Case 2 step 2A: Message only
3980+
# Case 2 step 2A: Respond with a message
39953981
if tool_name is None:
39963982
prompt = template_renderer.render(
39973983
messages=messages, tools=[], tool_calls=None, add_generation_prompt=True
@@ -4081,7 +4067,7 @@ def chatml_function_calling(
40814067
),
40824068
"message": {
40834069
"role": "assistant",
4084-
"content": message_content, # Include message content when present
4070+
"content": None,
40854071
"tool_calls": [
40864072
{
40874073
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],
@@ -4157,15 +4143,14 @@ def gguf_function_calling(
41574143
Iterator[llama_types.CreateChatCompletionStreamResponse],
41584144
]:
41594145

4160-
# Try to get model-specific template from metadata
4161-
model_template = None
4146+
function_calling_template = None
41624147
if hasattr(llama, 'model_path'):
41634148
metadata = llama.metadata
41644149
if metadata and "tokenizer.chat_template" in metadata:
4165-
model_template = metadata["tokenizer.chat_template"]
4150+
function_calling_template = metadata["tokenizer.chat_template"]
4151+
41664152

4167-
# Use model template or fall back to default
4168-
function_calling_template = model_template if model_template else (
4153+
function_calling_template = (
41694154
"{% for message in messages %}"
41704155
"<|im_start|>{{ message.role }}\n"
41714156
# System message
@@ -4189,6 +4174,7 @@ def gguf_function_calling(
41894174
"\nfunctions.<function_name>:"
41904175
'\n{ "arg1": "value1", "arg2": "value2" }'
41914176
"\n</function_calls>"
4177+
"\n\nYou can also combine both formats to provide explanatory text with function calls."
41924178
"{% endif %}"
41934179
"<|im_end|>\n"
41944180
"{% endif %}"
@@ -4309,12 +4295,15 @@ def gguf_function_calling(
43094295
)
43104296
initial_gbnf_tool_grammar = (
43114297
(
4312-
'root ::= "<function_calls>" "\\n" functions | "message:" text | "message:" text "<function_calls>" "\\n" functions\n'
4298+
'root ::= message_only | message_with_functions | functions_only\n'
4299+
'message_only ::= "message:" text\n'
4300+
'message_with_functions ::= "message:" text "<function_calls>\\n" functions\n'
4301+
'functions_only ::= "<function_calls>\\n" functions\n'
43134302
'text ::= [^<]+\n'
43144303
f"functions ::= {function_names}\n"
43154304
)
43164305
if tool_choice == "auto"
4317-
else f'root ::= "<function_calls>" "\\n" functions\nfunctions ::= {function_names}\n'
4306+
else f'root ::= "<function_calls>\\n" functions\nfunctions ::= {function_names}\n'
43184307
)
43194308
completion = cast(
43204309
llama_types.CreateCompletionResponse,
@@ -4333,37 +4322,49 @@ def gguf_function_calling(
43334322
),
43344323
)
43354324
text = completion["choices"][0]["text"]
4336-
# Extract message content and/or function call
4337-
tool_name = None
4325+
4326+
# Parse the response to extract message and/or function calls
43384327
message_content = None
4339-
4328+
tool_name = None
4329+
43404330
if text.startswith("message:"):
4341-
# Handle message with or without function call
4342-
parts = text.split("<function_calls>", 1)
4343-
message_content = parts[0][len("message:"):].strip()
4344-
if len(parts) > 1:
4345-
# Has both message and function call
4346-
tool_name = parts[1].split("\n")[-1][len("functions.") :]
4347-
else:
4348-
# Only function call
4349-
tool_name = text.split("\n")[-1][len("functions.") :]
4331+
# Extract message content
4332+
if "<function_calls>" in text:
4333+
# Combined message and function calls
4334+
parts = text.split("<function_calls>", 1)
4335+
message_content = parts[0][len("message:"):].strip()
4336+
if len(parts) > 1 and "functions." in parts[1]:
4337+
tool_name = parts[1].split("functions.", 1)[1].split(":", 1)[0].strip()
4338+
else:
4339+
# Message only
4340+
message_content = text[len("message:"):].strip()
4341+
elif text.startswith("<function_calls>") and "functions." in text:
4342+
# Function calls only
4343+
tool_name = text.split("functions.", 1)[1].split(":", 1)[0].strip()
43504344

4351-
# Case 2 step 2A: Respond with message only
4345+
# Case 2 step 2A: Respond with message only
43524346
if tool_name is None and message_content is not None:
4353-
completion["choices"][0]["text"] = message_content
4354-
return _convert_completion_to_chat(completion, stream=stream)
4347+
prompt = template_renderer.render(
4348+
messages=messages, tools=[], tool_calls=None, add_generation_prompt=True
4349+
)
4350+
completion_response = llama.create_completion(
4351+
prompt=prompt,
4352+
**completion_kwargs, # type: ignore[arg-type]
4353+
logprobs=top_logprobs if logprobs else None,
4354+
)
4355+
completion_response["choices"][0]["text"] = message_content
4356+
return _convert_completion_to_chat(completion_response, stream=stream)
43554357

4356-
# Case 2 step 2B: One or more function calls
4357-
follow_up_gbnf_tool_grammar = (
4358-
'root ::= functions | "</function_calls>" | "<|im_end|>"\n'
4359-
f"functions ::= {function_names}\n"
4358+
# Case 2 step 2B: One or more function calls
4359+
follow_up_gbnf_tool_grammar = (
4360+
'root ::= functions | "</function_calls>" | "<|im_end|>"\n'
4361+
f"functions ::= {function_names}\n"
4362+
)
4363+
prompt += "<function_calls>\n"
4364+
if stream:
4365+
return _stream_tool_calls(
4366+
llama, prompt, tools, tool_name, completion_kwargs, follow_up_gbnf_tool_grammar
43604367
)
4361-
prompt += "<function_calls>\n"
4362-
if stream:
4363-
return _stream_tool_calls(
4364-
llama, prompt, tools, tool_name, completion_kwargs, follow_up_gbnf_tool_grammar
4365-
)
4366-
43674368
tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None)
43684369
completions: List[llama_types.CreateCompletionResponse] = []
43694370
completions_tool_name: List[str] = []
@@ -4430,7 +4431,7 @@ def gguf_function_calling(
44304431
),
44314432
"message": {
44324433
"role": "assistant",
4433-
"content": message_content, # Include message content if present
4434+
"content": None,
44344435
"tool_calls": [
44354436
{
44364437
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],

0 commit comments

Comments
 (0)