ggml-org
diff --git a/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎common/CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎common/common.cpp‎
Lines changed: 46 additions & 68 deletions b/‎common/common.cpp‎
Lines changed: 46 additions & 68 deletions
diff --git a/‎common/common.h‎
Lines changed: 11 additions & 11 deletions b/‎common/common.h‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎common/toolcall/handler.cpp‎
Lines changed: 2 additions & 66 deletions b/‎common/toolcall/handler.cpp‎
Lines changed: 2 additions & 66 deletions
diff --git a/‎common/toolcall/handler.hpp‎
Lines changed: 2 additions & 24 deletions b/‎common/toolcall/handler.hpp‎
Lines changed: 2 additions & 24 deletions
@@ -75,6 +75,8 @@ add_library(${TARGET} STATIC
     sampling.h
     speculative.cpp
     speculative.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/toolcall/params.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/toolcall/params.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/toolcall/handler.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/toolcall/handler.hpp
     ${CMAKE_CURRENT_SOURCE_DIR}/toolcall/mcp_messages.cpp
 
@@ -1768,42 +1768,18 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
     return text;
 }
 
-//
-// Chat template utils
-//
-
-bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
-    if (use_jinja) {
-        try {
-            auto chat_template = common_chat_template(tmpl, "<s>", "</s>");
-            common_chat_inputs inputs;
-            inputs.messages = json::array({{
-                {"role", "user"},
-                {"content", "test"},
-            }});
-            common_chat_params_init(chat_template, inputs);
-            return true;
-        } catch (const std::exception & e) {
-            LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what());
-            return false;
-        }
-    }
-    llama_chat_message chat[] = {{"user", "test"}};
-    const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
-    return res >= 0;
-}
-
-static void copy_chat_params(const common_chat_params & src, toolcall::sampling_updater * update_sparams)
+void common_chat_grammar_to_sampler(const common_chat_params * src,
+                                    const llama_vocab * vocab,
+                                    common_params_sampling * sparams)
 {
-    GGML_ASSERT(update_sparams && update_sparams->sparams && update_sparams->vocab);
+    GGML_ASSERT(src && vocab && sparams);
 
-    auto & dst = *update_sparams->sparams;
-    auto vocab = update_sparams->vocab;
+    auto & dst = *sparams;
 
-    dst.grammar      = src.grammar;
-    dst.grammar_lazy = src.grammar_lazy;
+    dst.grammar      = src->grammar;
+    dst.grammar_lazy = src->grammar_lazy;
 
-    for (const auto & trigger : src.grammar_triggers) {
+    for (const auto & trigger : src->grammar_triggers) {
         auto ids = common_tokenize(vocab, trigger.word, false, true);
 
         if (ids.size() == 1) {
@@ -1816,7 +1792,7 @@ static void copy_chat_params(const common_chat_params & src, toolcall::sampling_
         dst.grammar_trigger_words.push_back(trigger);
     }
 
-    for (const auto & preserved : src.preserved_tokens) {
+    for (const auto & preserved : src->preserved_tokens) {
         auto ids = common_tokenize(vocab, preserved, false, true);
         if (ids.size() == 1) {
             LOG_DBG("Preserved token: %d\n", ids[0]);
@@ -1831,19 +1807,45 @@ static void copy_chat_params(const common_chat_params & src, toolcall::sampling_
     }
 }
 
+
+//
+// Chat template utils
+//
+
+bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
+    if (use_jinja) {
+        try {
+            auto chat_template = common_chat_template(tmpl, "<s>", "</s>");
+            common_chat_inputs inputs;
+            inputs.messages = json::array({{
+                {"role", "user"},
+                {"content", "test"},
+            }});
+            common_chat_params_init(chat_template, inputs);
+            return true;
+        } catch (const std::exception & e) {
+            LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what());
+            return false;
+        }
+    }
+    llama_chat_message chat[] = {{"user", "test"}};
+    const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0);
+    return res >= 0;
+}
+
 std::string common_chat_apply_template(
         const common_chat_templates & tmpl,
         const std::vector<common_chat_msg> & msgs,
         bool add_ass,
         bool use_jinja,
-        toolcall::handler::ptr handler,
-        toolcall::sampling_updater * update_sparams)
+        const common_chat_inputs * inputs_,
+        common_chat_params * out_params)
 {
-    bool use_tool_template = (use_jinja && handler != nullptr) && tmpl.template_tool_use;
+    bool use_tool_template = use_jinja && tmpl.template_tool_use;
     const auto & tmpl_selected = use_tool_template ? *tmpl.template_tool_use : *tmpl.template_default;
 
     if (use_jinja) {
-        common_chat_inputs inputs;
+        common_chat_inputs inputs = inputs_ ? *inputs_ : common_chat_inputs();
 
         auto messages = json::array();
         for (const auto & msg : msgs) {
@@ -1852,35 +1854,11 @@ std::string common_chat_apply_template(
         inputs.messages = messages;
         inputs.add_generation_prompt = add_ass;
 
-        if (handler != nullptr) {
-            auto choice = handler->tool_choice();
-            if (std::holds_alternative<std::string>(choice)) {
-                inputs.tool_choice = std::get<std::string>(choice);
-
-            } else {
-                auto choice_ptr = std::get<toolcall::json_ptr>(choice);
-                if (choice_ptr != nullptr) {
-                    inputs.tool_choice = *choice_ptr;
-                }
-            }
-
-            inputs.tools = handler->tool_list();
-        }
-
         auto chat_params = common_chat_params_init(tmpl_selected, inputs);
-        if (update_sparams) {
-            copy_chat_params(chat_params, update_sparams);
-        }
-
-        auto prompt = chat_params.prompt;
-        if (handler != nullptr) {
-            json response;
-            handler->call(prompt, response);
-            return response; // Caller will determine what to do based upon last_action
-
-        } else {
-            return prompt;
+        if (out_params != nullptr) {
+            *out_params = chat_params;
         }
+        return chat_params.prompt;
     }
 
     int alloc_size = 0;
@@ -1918,12 +1896,12 @@ std::string common_chat_format_single(
         const common_chat_msg & new_msg,
         bool add_ass,
         bool use_jinja,
-        toolcall::handler::ptr handler,
-        toolcall::sampling_updater * update_sparams)
+        const common_chat_inputs * inputs,
+        common_chat_params * out_params)
 {
     std::ostringstream ss;
     auto fmt_past_msg = past_msg.empty() ? ""
-        : common_chat_apply_template(tmpl, past_msg, false, use_jinja, handler, update_sparams);
+        : common_chat_apply_template(tmpl, past_msg, false, use_jinja, inputs);
 
     std::vector<common_chat_msg> chat_new(past_msg);
     // if the past_msg ends with a newline, we must preserve it in the formatted version
@@ -1932,7 +1910,7 @@ std::string common_chat_format_single(
     };
     // format chat with new_msg
     chat_new.push_back(new_msg);
-    auto fmt_new_msg = common_chat_apply_template(tmpl, chat_new, add_ass, use_jinja, handler, update_sparams);
+    auto fmt_new_msg = common_chat_apply_template(tmpl, chat_new, add_ass, use_jinja, inputs, out_params);
     // get the diff part
     ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size());
     return ss.str();
 
@@ -618,6 +618,13 @@ std::string common_detokenize(
         const std::vector<llama_token> & tokens,
                                   bool   special = true);
 
+struct common_chat_params;
+struct common_chat_inputs;
+void common_chat_grammar_to_sampler(const common_chat_params * src,
+                                    const llama_vocab * vocab,
+                                    common_params_sampling * sparams);
+
+
 //
 // Chat template utils
 //
@@ -651,13 +658,6 @@ struct common_chat_templates {
     std::unique_ptr<common_chat_template> template_tool_use;
 };
 
-namespace toolcall {
-    struct sampling_updater {
-        common_params_sampling * sparams;
-        const llama_vocab      * vocab;
-    };
-}
-
 // CPP wrapper for llama_chat_apply_template
 // If the built-in template is not supported, we default to chatml
 // If the custom "tmpl" is not supported, we throw an error
@@ -666,8 +666,8 @@ std::string common_chat_apply_template(
         const std::vector<common_chat_msg> & chat,
         bool add_ass,
         bool use_jinja,
-        toolcall::handler::ptr handler = nullptr,
-        toolcall::sampling_updater * update_sparams = nullptr);
+        const common_chat_inputs * inputs = nullptr,
+        common_chat_params * out_params = nullptr);
 
 // Format single message, while taking into account the position of that message in chat history
 std::string common_chat_format_single(
@@ -676,8 +676,8 @@ std::string common_chat_format_single(
         const common_chat_msg & new_msg,
         bool add_ass,
         bool use_jinja,
-        toolcall::handler::ptr handler = nullptr,
-        toolcall::sampling_updater * update_sparams = nullptr);
+        const common_chat_inputs * inputs = nullptr,
+        common_chat_params * out_params = nullptr);
 
 // Returns an example of formatted chat
 std::string common_chat_format_example(
 
@@ -1,5 +1,7 @@
 
+#include "../json.hpp"
 #include "handler.hpp"
+#include "params.hpp"
 
 #ifdef LLAMA_USE_CURL
 #    include "mcp_sse_transport.hpp"
@@ -9,16 +11,6 @@
 
 using json = toolcall::json;
 
-toolcall::params::params(std::string tools, std::string choice) {
-    this->tools(tools);
-    this->choice(choice);
-}
-
-static bool starts_with(const std::string & str, const std::string & prefix) {
-    return str.size() >= prefix.size()
-        && str.compare(0, prefix.size(), prefix) == 0;
-}
-
 std::shared_ptr<toolcall::handler> toolcall::create_handler(const toolcall::params & params) {
     std::shared_ptr<toolcall::handler> result;
 
@@ -42,62 +34,6 @@ std::shared_ptr<toolcall::handler> toolcall::create_handler(const toolcall::para
     return result;
 }
 
-void toolcall::params::tools(std::string tools) {
-    try {
-
-        if (tools.empty()) {
-            tools_ = std::move(tools);
-
-        } else if (starts_with(tools, "mcp+http")) {
-#ifdef LLAMA_USE_CURL
-            tools_ = std::move(tools);
-#else
-            throw std::invalid_argument(
-                "Model Context Protocol (MCP) only works when llama.cpp is compiled with libcurl");
-#endif
-        } else {
-            tools_ = std::make_shared<json>(json::parse(tools));
-            auto tools_ptr = std::get<std::shared_ptr<json>>(tools_);
-            if (! tools_ptr->is_array()) {
-                throw std::invalid_argument(
-                    "tools must be a URL of the form \"mcp+http(s)://hostname[:port]/\""
-                    ", or a valid JSON array containing tool definitions");
-            }
-        }
-
-    } catch (const json::exception & err) {
-        throw std::invalid_argument(err.what());
-    }
-}
-
-void toolcall::params::choice(std::string choice) {
-    try {
-        if (choice == "auto" || choice == "required" || choice == "none") {
-            tool_choice_ = std::move(choice);
-
-        } else {
-            auto choice_ptr = std::make_shared<json>(json::parse(choice));
-            tool_choice_ = choice_ptr;
-            if (! choice_ptr->is_object()) {
-                throw std::invalid_argument(
-                    "tool choice must be a valid JSON object, \"auto\", \"required\", or \"none\"");
-            }
-        }
-
-    } catch (const json::exception & err) {
-        throw std::invalid_argument(err.what());
-    }
-}
-
-toolcall::params::operator bool() const  {
-    if (std::holds_alternative<std::string>(tools_)) {
-        return ! std::get<std::string>(tools_).empty();
-
-    } else {
-        return std::get<toolcall::json_ptr>(tools_) != nullptr;
-    }
-}
-
 json toolcall::handler::tool_list() {
     return impl_->tool_list();
 }
 
@@ -1,11 +1,11 @@
 #pragma once
 
+#include "../json.hpp"
+#include "params.hpp" // TODO: make foreward decl.
 #include <string>
 #include <variant>
 #include <memory>
 
-#include "../json.hpp"
-
 namespace toolcall
 {
     using json = nlohmann::ordered_json;
@@ -36,28 +36,6 @@ namespace toolcall
         action last_action_;
     };
 
-    class params {
-    public:
-        params(std::string tools = "", std::string choice = "auto");
-
-        params(const params & other) = default;
-        params(params && other) noexcept = default;
-        params & operator=(const params & other) = default;
-        params & operator=(params && other) noexcept = default;
-
-        operator bool() const;
-
-        void tools(std::string tools);
-        const tools_t tools() const { return tools_; }
-
-        void choice(std::string choice);
-        const tool_choice_t & choice() const { return tool_choice_; }
-
-    private:
-        tools_t tools_;
-        tool_choice_t tool_choice_;
-    };
-
     std::shared_ptr<toolcall::handler> create_handler(const toolcall::params & params);
 
     class handler_impl {