| 
9 | 9 | #include <minja/chat-template.hpp>  | 
10 | 10 | #include <minja/minja.hpp>  | 
11 | 11 | 
 
  | 
 | 12 | +#include <algorithm>  | 
12 | 13 | #include <cstdio>  | 
 | 14 | +#include <cctype>  | 
13 | 15 | #include <exception>  | 
 | 16 | +#include <functional>  | 
14 | 17 | #include <iostream>  | 
15 | 18 | #include <optional>  | 
16 | 19 | #include <stdexcept>  | 
@@ -640,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) {  | 
640 | 643 |         case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS";  | 
641 | 644 |         case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2";  | 
642 | 645 |         case COMMON_CHAT_FORMAT_APERTUS: return "Apertus";  | 
 | 646 | +        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools";  | 
643 | 647 |         default:  | 
644 | 648 |             throw std::runtime_error("Unknown chat format");  | 
645 | 649 |     }  | 
@@ -986,6 +990,126 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat  | 
986 | 990 |     return data;  | 
987 | 991 | }  | 
988 | 992 | 
 
  | 
 | 993 | + | 
 | 994 | +// Case-insensitive find  | 
 | 995 | +static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) {  | 
 | 996 | +    auto it = std::search(  | 
 | 997 | +        haystack.begin() + pos, haystack.end(),  | 
 | 998 | +        needle.begin(), needle.end(),  | 
 | 999 | +        [](char a, char b) { return std::tolower(a) == std::tolower(b); }  | 
 | 1000 | +    );  | 
 | 1001 | +    return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it);  | 
 | 1002 | +}  | 
 | 1003 | + | 
 | 1004 | +static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) {  | 
 | 1005 | +    common_chat_params data;  | 
 | 1006 | +    const auto is_json_schema_provided = !inputs.json_schema.is_null();  | 
 | 1007 | +    const auto is_grammar_provided = !inputs.grammar.empty();  | 
 | 1008 | +    const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty();  | 
 | 1009 | + | 
 | 1010 | +    // the logic requires potentially modifying the messages  | 
 | 1011 | +    auto tweaked_messages = inputs.messages;  | 
 | 1012 | + | 
 | 1013 | +    auto replace_json_schema_marker = [](json & messages) -> bool {  | 
 | 1014 | +        static std::string marker1 = "force json schema.\n";  | 
 | 1015 | +        static std::string marker2 = "force json schema.";  | 
 | 1016 | + | 
 | 1017 | +        if (messages.empty() || messages.at(0).at("role") != "system") {  | 
 | 1018 | +            return false;  | 
 | 1019 | +        }  | 
 | 1020 | + | 
 | 1021 | +        std::string content = messages.at(0).at("content");  | 
 | 1022 | + | 
 | 1023 | +        for (const auto & marker : {marker1, marker2}) {  | 
 | 1024 | +            const auto pos = ifind_string(content, marker);  | 
 | 1025 | +            if (pos != std::string::npos) {  | 
 | 1026 | +                content.replace(pos, marker.length(), "");  | 
 | 1027 | +                // inject modified content back into the messages  | 
 | 1028 | +                messages.at(0).at("content") = content;  | 
 | 1029 | +                return true;  | 
 | 1030 | +            }  | 
 | 1031 | +        }  | 
 | 1032 | + | 
 | 1033 | +        return false;  | 
 | 1034 | +    };  | 
 | 1035 | + | 
 | 1036 | +    // Lfm2 model does not natively work with json, but can generally understand the tools structure  | 
 | 1037 | +    //  | 
 | 1038 | +    // Example of the pytorch dialog structure:  | 
 | 1039 | +    //     <|startoftext|><|im_start|>system  | 
 | 1040 | +    //     List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|>  | 
 | 1041 | +    //     <|im_start|>user  | 
 | 1042 | +    //     What is the current status of candidate ID 12345?<|im_end|>  | 
 | 1043 | +    //     <|im_start|>assistant  | 
 | 1044 | +    //     <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|>  | 
 | 1045 | +    //     <|im_start|>tool  | 
 | 1046 | +    //     <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|>  | 
 | 1047 | +    //     <|im_start|>assistant  | 
 | 1048 | +    //     The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|>  | 
 | 1049 | +    //  | 
 | 1050 | +    // For the llama server compatibility with json tools semantic,  | 
 | 1051 | +    // the client can add "Follow json schema." line into the system message prompt to force the json output.  | 
 | 1052 | +    //  | 
 | 1053 | +    if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) {  | 
 | 1054 | +        // server/utils.hpp prohibits that branch for the custom grammar anyways  | 
 | 1055 | +        throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar");  | 
 | 1056 | +    } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) {  | 
 | 1057 | +        LOG_INF("%s: Using tools to build a grammar\n", __func__);  | 
 | 1058 | + | 
 | 1059 | +        data.grammar = build_grammar([&](const common_grammar_builder & builder) {  | 
 | 1060 | +            auto schemas = json::array();  | 
 | 1061 | +            foreach_function(inputs.tools, [&](const json & tool) {  | 
 | 1062 | +                const auto & function = tool.at("function");  | 
 | 1063 | +                schemas.push_back({  | 
 | 1064 | +                    {"type", "object"},  | 
 | 1065 | +                    {"properties", {  | 
 | 1066 | +                        {"name", {  | 
 | 1067 | +                            {"type", "string"},  | 
 | 1068 | +                            {"const", function.at("name")},  | 
 | 1069 | +                        }},  | 
 | 1070 | +                        {"arguments", function.at("parameters")},  | 
 | 1071 | +                    }},  | 
 | 1072 | +                    {"required", json::array({"name", "arguments", "id"})},  | 
 | 1073 | +                });  | 
 | 1074 | +            });  | 
 | 1075 | +            auto schema = json {  | 
 | 1076 | +                {"type", "array"},  | 
 | 1077 | +                {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},  | 
 | 1078 | +                {"minItems", 1},  | 
 | 1079 | +            };  | 
 | 1080 | +            if (!inputs.parallel_tool_calls) {  | 
 | 1081 | +                schema["maxItems"] = 1;  | 
 | 1082 | +            }  | 
 | 1083 | + | 
 | 1084 | +            builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\"");  | 
 | 1085 | +        });  | 
 | 1086 | +        // model has no concept of tool selection mode choice,  | 
 | 1087 | +        // if the system prompt rendered correctly it will produce a tool call  | 
 | 1088 | +        // the grammar goes inside the tool call body  | 
 | 1089 | +        data.grammar_lazy = true;  | 
 | 1090 | +        data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}};  | 
 | 1091 | +        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};  | 
 | 1092 | +        data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS;  | 
 | 1093 | +    } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) {  | 
 | 1094 | +        LOG_INF("%s: Using tools without json schema or grammar\n", __func__);  | 
 | 1095 | +        // output those tokens  | 
 | 1096 | +        data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"};  | 
 | 1097 | +    } else if (is_json_schema_provided) {  | 
 | 1098 | +        LOG_INF("%s: Using provided json schema to build a grammar\n", __func__);  | 
 | 1099 | +        data.grammar = json_schema_to_grammar(inputs.json_schema);  | 
 | 1100 | +    } else if (is_grammar_provided) {  | 
 | 1101 | +        LOG_INF("%s: Using provided grammar\n", __func__);  | 
 | 1102 | +        data.grammar = inputs.grammar;  | 
 | 1103 | +    } else {  | 
 | 1104 | +        LOG_INF("%s: Using content relying on the template\n", __func__);  | 
 | 1105 | +    }  | 
 | 1106 | + | 
 | 1107 | +    data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages);  | 
 | 1108 | +    LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str());  | 
 | 1109 | + | 
 | 1110 | +    return data;  | 
 | 1111 | +}  | 
 | 1112 | + | 
989 | 1113 | static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) {  | 
990 | 1114 |     common_chat_params data;  | 
991 | 1115 |     data.prompt = apply(tmpl, inputs);  | 
@@ -2499,6 +2623,71 @@ static void common_chat_parse_apertus(common_chat_msg_parser & builder) {  | 
2499 | 2623 |     builder.add_content(builder.consume_rest());  | 
2500 | 2624 | }  | 
2501 | 2625 | 
 
  | 
 | 2626 | + | 
 | 2627 | +static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {  | 
 | 2628 | +    if (!builder.syntax().parse_tool_calls) {  | 
 | 2629 | +        builder.add_content(builder.consume_rest());  | 
 | 2630 | +        return;  | 
 | 2631 | +    }  | 
 | 2632 | + | 
 | 2633 | +    // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>  | 
 | 2634 | +    static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));  | 
 | 2635 | +    static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));  | 
 | 2636 | + | 
 | 2637 | +    // Loop through all tool calls  | 
 | 2638 | +    while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {  | 
 | 2639 | +        builder.move_to(res->groups[0].end);  | 
 | 2640 | + | 
 | 2641 | +        // Parse JSON array format: [{"name": "...", "arguments": {...}}]  | 
 | 2642 | +        auto tool_calls_data = builder.consume_json();  | 
 | 2643 | + | 
 | 2644 | +        // Consume end marker  | 
 | 2645 | +        builder.consume_spaces();  | 
 | 2646 | +        if (!builder.try_consume_regex(tool_call_end_regex)) {  | 
 | 2647 | +            throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");  | 
 | 2648 | +        }  | 
 | 2649 | + | 
 | 2650 | +        // Process each tool call in the array  | 
 | 2651 | +        if (tool_calls_data.json.is_array()) {  | 
 | 2652 | +            for (const auto & tool_call : tool_calls_data.json) {  | 
 | 2653 | +                if (!tool_call.is_object()) {  | 
 | 2654 | +                    throw common_chat_msg_partial_exception("Tool call must be an object");  | 
 | 2655 | +                }  | 
 | 2656 | + | 
 | 2657 | +                if (!tool_call.contains("name")) {  | 
 | 2658 | +                    throw common_chat_msg_partial_exception("Tool call missing 'name' field");  | 
 | 2659 | +                }  | 
 | 2660 | + | 
 | 2661 | +                std::string function_name = tool_call.at("name");  | 
 | 2662 | +                std::string arguments = "{}";  | 
 | 2663 | + | 
 | 2664 | +                if (tool_call.contains("arguments")) {  | 
 | 2665 | +                    if (tool_call.at("arguments").is_object()) {  | 
 | 2666 | +                        arguments = tool_call.at("arguments").dump();  | 
 | 2667 | +                    } else if (tool_call.at("arguments").is_string()) {  | 
 | 2668 | +                        arguments = tool_call.at("arguments");  | 
 | 2669 | +                    }  | 
 | 2670 | +                }  | 
 | 2671 | + | 
 | 2672 | +                if (!builder.add_tool_call(function_name, "", arguments)) {  | 
 | 2673 | +                    throw common_chat_msg_partial_exception("Incomplete tool call");  | 
 | 2674 | +                }  | 
 | 2675 | +            }  | 
 | 2676 | +        } else {  | 
 | 2677 | +            throw common_chat_msg_partial_exception("Expected JSON array for tool calls");  | 
 | 2678 | +        }  | 
 | 2679 | + | 
 | 2680 | +        // Consume any trailing whitespace after this tool call  | 
 | 2681 | +        builder.consume_spaces();  | 
 | 2682 | +    }  | 
 | 2683 | + | 
 | 2684 | +    // Consume any remaining content after all tool calls  | 
 | 2685 | +    auto remaining = builder.consume_rest();  | 
 | 2686 | +    if (!string_strip(remaining).empty()) {  | 
 | 2687 | +        builder.add_content(remaining);  | 
 | 2688 | +    }  | 
 | 2689 | +}  | 
 | 2690 | + | 
2502 | 2691 | static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {  | 
2503 | 2692 |     // Parse thinking tags first - this handles the main reasoning content  | 
2504 | 2693 |     builder.try_parse_reasoning("<seed:think>", "</seed:think>");  | 
@@ -2748,6 +2937,12 @@ static common_chat_params common_chat_templates_apply_jinja(  | 
2748 | 2937 |         return common_chat_params_init_apertus(tmpl, params);  | 
2749 | 2938 |     }  | 
2750 | 2939 | 
 
  | 
 | 2940 | +    // LFM2 (w/ tools)  | 
 | 2941 | +    if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos &&  | 
 | 2942 | +        src.find("]<|tool_list_end|>") != std::string::npos) {  | 
 | 2943 | +        return common_chat_params_init_lfm2(tmpl, params);  | 
 | 2944 | +    }  | 
 | 2945 | + | 
2751 | 2946 |     // Use generic handler when mixing tools + JSON schema.  | 
2752 | 2947 |     // TODO: support that mix in handlers below.  | 
2753 | 2948 |     if ((params.tools.is_array() && params.json_schema.is_object())) {  | 
@@ -2926,6 +3121,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {  | 
2926 | 3121 |         case COMMON_CHAT_FORMAT_APERTUS:  | 
2927 | 3122 |             common_chat_parse_apertus(builder);  | 
2928 | 3123 |             break;  | 
 | 3124 | +        case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:  | 
 | 3125 | +            common_chat_parse_lfm2(builder);  | 
 | 3126 | +            break;  | 
2929 | 3127 |         default:  | 
2930 | 3128 |             throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));  | 
2931 | 3129 |     }  | 
 | 
0 commit comments