WIP: use common_chat_parse for toolcall

bandoti · bandoti · commit ac1fc3182081 · 2025-03-05T12:54:13.000-04:00
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -143,39 +143,37 @@ class chat_formatter {
 };
 
 #ifdef LLAMA_USE_TOOLCALL
-static bool call_tool(common_params & params, const std::string & assistant_msg,
-                      llama_context * ctx, toolcall::client::ptr tc_client, std::vector<llama_token> & embd_inp)
+static bool call_tool(const std::string & assistant_msg, llama_context * ctx, toolcall::client::ptr tc_client, std::vector<llama_token> & embd_inp)
 {
-    auto should_use_toolcall = [&params, tc_client] (const std::string & asst_msg) {
-        if (! params.use_jinja || tc_client == nullptr) {
-            return false;
-        }
-        try {
-            nlohmann::json j = nlohmann::json::parse(asst_msg);
-            return (j.contains("name") && j.contains("parameters"));
-
-        } catch (const nlohmann::json::exception & err) {
-            return false;
-        }
-    };
-
-    if (should_use_toolcall(assistant_msg)) {
-        toolcall::result_set res = tc_client->call(assistant_msg);
-        if (! res.empty()) {
-            std::string toolcall_result_str;
-            for (const auto & r : res) {
-                toolcall_result_str += ("\n" + r.data); // Although more complex results can be
-                                                        // returned (resources, images, etc.),
-                                                        // for now simply append the data. Later
-                                                        // on support for specific models may
-                                                        // allow for unpacking Base64 data.
+    bool tool_was_called = false;
+    common_chat_msg msg = common_chat_parse(assistant_msg, COMMON_CHAT_FORMAT_GENERIC);
+    if (! msg.tool_calls.empty()) {
+        for (const auto & tc : msg.tool_calls) {
+            nlohmann::json tc_oai_json {
+                {"type", "function"},
+                {"function", {
+                        {"name", tc.name},
+                        {"arguments", tc.arguments},
+                    }},
+                {"id", tc.id},
+            };
+            toolcall::result_set res = tc_client->call(tc_oai_json);
+            if (! res.empty()) {
+                std::string toolcall_result_str;
+                for (const auto & r : res) {
+                    toolcall_result_str += ("\n" + r.data); // Although more complex results can be
+                                                            // returned (resources, images, etc.),
+                                                            // for now simply append the data. Later
+                                                            // on support for specific models may
+                                                            // allow for unpacking Base64 data.
+                }
+                auto toolcall_result_tok = common_tokenize(ctx, toolcall_result_str, false, true);
+                embd_inp.insert(embd_inp.end(), toolcall_result_tok.begin(), toolcall_result_tok.end());
             }
-            auto toolcall_result_tok = common_tokenize(ctx, toolcall_result_str, false, true);
-            embd_inp.insert(embd_inp.end(), toolcall_result_tok.begin(), toolcall_result_tok.end());
+            tool_was_called = true;
         }
-        return true;
     }
-    return false;
+    return tool_was_called;
 }
 #endif
 
@@ -923,13 +921,8 @@ int main(int argc, char ** argv) {
 
                     if (params.enable_chat_template) {
                         chat_add_and_format("assistant", assistant_ss.str(), true);
-#ifdef LLAMA_USE_TOOLCALL
-                        is_interacting = ! call_tool(params, assistant_ss.str(), ctx, tc_client, embd_inp);
-                        LOG("\n");
-#else
                         is_interacting = true;
                         LOG("\n");
-#endif
                     }
                 }
             }
@@ -945,6 +938,16 @@ int main(int argc, char ** argv) {
                 }
             }
 
+#ifdef LLAMA_USE_TOOLCALL
+            if ((tc_client && n_past > 0) && (waiting_for_first_input || is_interacting)) {
+                size_t last_len = embd_inp.size();
+                bool was_toolcall = call_tool(assistant_ss.str(), ctx, tc_client, embd_inp);
+                if (was_toolcall && last_len < embd_inp.size()) {
+                    LOG("%s", common_token_to_piece(ctx, embd_inp[last_len]).c_str());
+                }
+            }
+#endif
+
             if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
                 LOG_DBG("waiting for user input\n");
 
@@ -1049,16 +1052,6 @@ int main(int argc, char ** argv) {
             }
         }
 
-#ifdef LLAMA_USE_TOOLCALL
-        if (params.single_turn) {
-            size_t last_len = embd_inp.size();
-            bool was_toolcall = call_tool(params, assistant_ss.str(), ctx, tc_client, embd_inp);
-            if (was_toolcall && last_len < embd_inp.size()) {
-                LOG("%s", common_token_to_piece(ctx, embd_inp[last_len]).c_str());
-            }
-        }
-#endif
-
         // end of generation
         if (!embd.empty() && llama_vocab_is_eog(vocab, embd.back()) && !(params.interactive)) {
             LOG(" [end of text]\n");