Tool calls support improvements (support null content in messages, handle correctly previous messages that contains tool calls, improvements in prompt format)

mario7421 · mario7421 · commit d3830adeafe4 · 2024-09-20T16:41:19.000+02:00
diff --git a/common/common.h b/common/common.h
@@ -407,6 +407,10 @@ std::string llama_detokenize(
 struct llama_chat_msg {
     std::string role;
     std::string content;
+    std::string tool_calls;
+
+    llama_chat_msg(std::string role, std::string content, std::string tool_calls = "")
+        : role(role), content(content), tool_calls(tool_calls) {}
 };
 
 // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
diff --git a/examples/server/tool-call.hpp b/examples/server/tool-call.hpp
@@ -51,11 +51,9 @@ inline std::string format_chat_with_tool(enum llama_tool_format format, const st
     std::stringstream ss;
     auto chat = parse_chat_messages(messages);
     if (format == LLAMA_TOOL_FORMAT_HERMES_3) {
-        ss << "<|im_start|>system\n\n";
-        ss << "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools>\n\n";
-        for (auto tool : tools) {
-            ss << tool.dump(1, '\t') << "\n\n";
-        }
+        ss << "<|im_start|>system\n";
+        ss << "You are a function calling AI model. You are provided with function signatures within <tools></tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions. Here are the available tools: <tools> ";
+        ss << tools.dump() << " ";
         ss << "</tools> Use the following pydantic model json schema for each tool call you will make: {\"properties\": {\"arguments\": {\"title\": \"Arguments\", \"type\": \"object\"}, \"name\": {\"title\": \"Name\", \"type\": \"string\"}}, \"required\": [\"arguments\", \"name\"], \"title\": \"FunctionCall\", \"type\": \"object\"} For each function call return a json object with function name and arguments within <tool_call></tool_call> XML tags as follows:\n";
         ss << "<tool_call>\n";
         ss << "{\"arguments\": <args-dict>, \"name\": <function-name>}\n";
@@ -65,14 +63,22 @@ inline std::string format_chat_with_tool(enum llama_tool_format format, const st
             if (role == "system") {
                 continue; // for optimal performance, we skip user-defined system message
             }
-            ss << "<|im_start|>" << role << "\n\n";
+            ss << "<|im_start|>" << role << "\n";
             if (role == "tool") {
                 ss << "<tool_response>\n" << string_strip(message.content) << "\n</tool_response>\n";
-            } else {
-                ss << string_strip(message.content) << "<|im_end|>\n";
             }
+            else if (role == "user") {
+                ss << string_strip(message.content);
+            }
+            else if (role == "assistant") {
+                ss << string_strip(message.content);
+                if (!message.tool_calls.empty()) {
+                    ss << "<tool_call>\n" << string_strip(message.tool_calls) << "\n</tool_call>";
+                }
+            }
+            ss << "<|im_end|>\n";
         }
-        ss << "<|im_start|>assistant\n\n";
+        ss << "<|im_start|>assistant\n";
     } else {
         throw std::runtime_error("tool_call is not supported by this model");
     }
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
@@ -124,24 +124,56 @@ inline std::vector<llama_chat_msg> parse_chat_messages(const std::vector<json> &
 
         std::string role = json_value(curr_msg, "role", std::string(""));
 
+        bool content_found = false;
+        bool valid_content_found = false;
         std::string content;
         if (curr_msg.contains("content")) {
+            content_found = true;
             if (curr_msg["content"].is_string()) {
+                valid_content_found = true;
                 content = curr_msg["content"].get<std::string>();
             } else if (curr_msg["content"].is_array()) {
+                valid_content_found = true;
                 for (const auto & part : curr_msg["content"]) {
                     if (part.contains("text")) {
                         content += "\n" + part["text"].get<std::string>();
                     }
                 }
-            } else {
-                throw std::runtime_error("Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
             }
-        } else {
+        }
+
+        std::string tool_calls;
+        if (role == "assistant") {
+            const std::string tool_calls_field_name = "tool_calls";
+            std::string tool_calls_parsed_text;
+            if (curr_msg.contains(tool_calls_field_name)) {
+                content_found = true;
+                if (curr_msg[tool_calls_field_name].is_array()) {
+                    std::size_t tool_calls_count = curr_msg[tool_calls_field_name].size();
+                    if (tool_calls_count > 1) {
+                        throw std::runtime_error("Parallel tool calls are not supported yet");
+                    }
+                    else if (tool_calls_count == 1) {
+                        valid_content_found = true;
+                        json tool_call_function(curr_msg[tool_calls_field_name][0]["function"]);
+                        if (tool_call_function["arguments"].is_string()) {
+                            tool_calls = "{\"arguments\": " + tool_call_function["arguments"].get<std::string>() +
+                                ", \"name\": \"" + tool_call_function["name"].get<std::string>() + "\"}";
+                        }
+                        else tool_calls = tool_call_function["function"].dump();
+                    }
+                }
+            }
+        }
+
+        if (!content_found) {
             throw std::runtime_error("Missing 'content' (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
         }
+        else if (!valid_content_found) {
+            throw std::runtime_error("Invalid 'content' type (ref: https://github.com/ggerganov/llama.cpp/issues/8367)");
+        }
 
-        chat.push_back({role, content});
+        chat.push_back({role, content, tool_calls});
     }
     return chat;
 }