improve gguf-function-calling parser

okaris · okaris · commit a7d162b0919e · 2025-09-17T21:29:05.000Z
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -4144,31 +4144,35 @@ def gguf_function_calling(
 ]:
     
     function_calling_template = None
-    if hasattr(llama, 'model_path'):
-        metadata = llama.metadata
-        if metadata and "tokenizer.chat_template" in metadata:
-            function_calling_template = metadata["tokenizer.chat_template"]
-
-
+    
+    
     function_calling_template = (
         "{% for message in messages %}"
         "<|im_start|>{{ message.role }}\n"
         # System message
         "{% if message.role == 'system' %}"
+        "<|system|>\n"
         "{{ message.content }}"
-        "{% if tool_calls %}"
-        "\n\nYou have access to the following functions:\n"
+        "{% if tools %}"
+        "\n# Tools\n"
+        "\nYou may call one or more functions to assist with the user query."
+        "\nPrefer proposing the function first; only emit `<function_calls>` when user intent to run is clear from context."
+        "\n\nSoft consent policy:"
+        "\n- Proceed immediately if the user explicitly asks to run/fetch/call/use a tool, or previously agreed, or supplied all required parameters."
+        "\n- Otherwise: reply with a one-line proposal naming the function and why, and end with “Proceed?”."
+        "\n- Do not over-ask if consent is obvious."
+        "\n\nYou are provided with function signatures within <tools></tools> XML tags:"
+        "\n<tools>"
         "{% for tool in tools %}"
-        '\n{% if tool.function.get("description") %}/* {{ tool.function.description | trim }} */{% endif %}'
-        "\nfunctions.{{ tool.function.name }}:\n"
-        "{{ tool.function.parameters | tojson }}"
-        "\n{% endfor %}"
-        "\nYou must respond to user messages with either a single message or with one or more function calls."
-        "\n\nTo respond with a message use the following format:"
-        "\n\nmessage:"
-        "\n<message>"
-        "\n\nTo respond with one or more function calls use the following format:"
-        "\n\n<function_calls>"
+        "{{ tool | tojson }}"
+        "{% endfor %}"
+        "</tools>"
+        "\n\nYou can respond in two ways:"
+        "\n\n1. Message only (proposal/confirmation step):"
+        "\nmessage:"
+        "\nI can use `ExampleFunction` to retrieve that. Proceed?"
+        "\n\n2. Message + function calls (when intent is clearly allowed by context):"
+        "\nmessage:"
         "\nfunctions.<function_name>:"
         '\n{ "arg1": "value1", "arg2": "value2" }'
         "\nfunctions.<function_name>:"
@@ -4204,6 +4208,15 @@ def gguf_function_calling(
         "{% endfor %}"
         "{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
     )
+    
+    if hasattr(llama, 'model_path'):
+        metadata = llama.metadata
+        if metadata and "tokenizer.chat_template" in metadata:
+            function_calling_template = metadata["tokenizer.chat_template"]
+            
+    if kwargs.get('chat_template'):
+        function_calling_template = kwargs.get('chat_template')
+
     template_renderer = ImmutableSandboxedEnvironment(
         autoescape=jinja2.select_autoescape(["html", "xml"]),
         undefined=jinja2.StrictUndefined,