Skip to content

Commit c3112c7

Browse files
committed
improve gguf-function-calling parser
1 parent 066638c commit c3112c7

File tree

1 file changed

+39
-29
lines changed

1 file changed

+39
-29
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4143,14 +4143,15 @@ def gguf_function_calling(
41434143
Iterator[llama_types.CreateChatCompletionStreamResponse],
41444144
]:
41454145

4146-
function_calling_template = None
4146+
# Try to get model-specific template from metadata
4147+
model_template = None
41474148
if hasattr(llama, 'model_path'):
41484149
metadata = llama.metadata
41494150
if metadata and "tokenizer.chat_template" in metadata:
4150-
function_calling_template = metadata["tokenizer.chat_template"]
4151+
model_template = metadata["tokenizer.chat_template"]
41514152

4152-
4153-
function_calling_template = (
4153+
# Use model template or fall back to default
4154+
function_calling_template = model_template if model_template else (
41544155
"{% for message in messages %}"
41554156
"<|im_start|>{{ message.role }}\n"
41564157
# System message
@@ -4294,7 +4295,11 @@ def gguf_function_calling(
42944295
)
42954296
initial_gbnf_tool_grammar = (
42964297
(
4297-
'root ::= "<function_calls>" "\\n" functions | "message:"\n'
4298+
'root ::= message_only | message_with_functions | functions_only\n'
4299+
'message_only ::= "message:" text\n'
4300+
'message_with_functions ::= "message:" text "<function_calls>\\n" functions\n'
4301+
'functions_only ::= "<function_calls>\\n" functions\n'
4302+
'text ::= [^<]+\n'
42984303
f"functions ::= {function_names}\n"
42994304
)
43004305
if tool_choice == "auto"
@@ -4317,32 +4322,37 @@ def gguf_function_calling(
43174322
),
43184323
)
43194324
text = completion["choices"][0]["text"]
4320-
tool_name = None if text.startswith("message") else text.split("\n")[-1][len("functions.") :]
4325+
# Extract message content and/or function call
4326+
tool_name = None
4327+
message_content = None
4328+
4329+
if text.startswith("message:"):
4330+
# Handle message with or without function call
4331+
parts = text.split("<function_calls>", 1)
4332+
message_content = parts[0][len("message:"):].strip()
4333+
if len(parts) > 1:
4334+
# Has both message and function call
4335+
tool_name = parts[1].split("\n")[-1][len("functions.") :]
4336+
else:
4337+
# Only function call
4338+
tool_name = text.split("\n")[-1][len("functions.") :]
43214339

4322-
# Case 2 step 2A: Respond with a message
4323-
if tool_name is None:
4324-
prompt = template_renderer.render(
4325-
messages=messages, tools=[], tool_calls=None, add_generation_prompt=True
4326-
)
4327-
return _convert_completion_to_chat(
4328-
llama.create_completion(
4329-
prompt=prompt,
4330-
**completion_kwargs, # type: ignore[arg-type]
4331-
logprobs=top_logprobs if logprobs else None,
4332-
),
4333-
stream=stream,
4334-
)
4340+
# Case 2 step 2A: Respond with message only
4341+
if tool_name is None and message_content is not None:
4342+
completion["choices"][0]["text"] = message_content
4343+
return _convert_completion_to_chat(completion, stream=stream)
43354344

4336-
# Case 2 step 2B: One or more function calls
4337-
follow_up_gbnf_tool_grammar = (
4338-
'root ::= functions | "</function_calls>" | "<|im_end|>"\n'
4339-
f"functions ::= {function_names}\n"
4340-
)
4341-
prompt += "<function_calls>\n"
4342-
if stream:
4343-
return _stream_tool_calls(
4344-
llama, prompt, tools, tool_name, completion_kwargs, follow_up_gbnf_tool_grammar
4345+
# Case 2 step 2B: One or more function calls
4346+
follow_up_gbnf_tool_grammar = (
4347+
'root ::= functions | "</function_calls>" | "<|im_end|>"\n'
4348+
f"functions ::= {function_names}\n"
43454349
)
4350+
prompt += "<function_calls>\n"
4351+
if stream:
4352+
return _stream_tool_calls(
4353+
llama, prompt, tools, tool_name, completion_kwargs, follow_up_gbnf_tool_grammar
4354+
)
4355+
43464356
tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None)
43474357
completions: List[llama_types.CreateCompletionResponse] = []
43484358
completions_tool_name: List[str] = []
@@ -4409,7 +4419,7 @@ def gguf_function_calling(
44094419
),
44104420
"message": {
44114421
"role": "assistant",
4412-
"content": None,
4422+
"content": message_content, # Include message content if present
44134423
"tool_calls": [
44144424
{
44154425
"id": "call_" + f"_{i}_" + tool_name + "_" + completion["id"],

0 commit comments

Comments
 (0)