|
9 | 9 | #include <minja/chat-template.hpp> |
10 | 10 | #include <minja/minja.hpp> |
11 | 11 |
|
| 12 | +#include <algorithm> |
12 | 13 | #include <cstdio> |
| 14 | +#include <cctype> |
13 | 15 | #include <exception> |
| 16 | +#include <functional> |
14 | 17 | #include <iostream> |
15 | 18 | #include <optional> |
16 | 19 | #include <stdexcept> |
@@ -640,6 +643,7 @@ const char * common_chat_format_name(common_chat_format format) { |
640 | 643 | case COMMON_CHAT_FORMAT_SEED_OSS: return "Seed-OSS"; |
641 | 644 | case COMMON_CHAT_FORMAT_NEMOTRON_V2: return "Nemotron V2"; |
642 | 645 | case COMMON_CHAT_FORMAT_APERTUS: return "Apertus"; |
| 646 | + case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: return "LFM2 with JSON tools"; |
643 | 647 | default: |
644 | 648 | throw std::runtime_error("Unknown chat format"); |
645 | 649 | } |
@@ -986,6 +990,126 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat |
986 | 990 | return data; |
987 | 991 | } |
988 | 992 |
|
| 993 | + |
| 994 | +// Case-insensitive find |
| 995 | +static size_t ifind_string(const std::string & haystack, const std::string & needle, size_t pos = 0) { |
| 996 | + auto it = std::search( |
| 997 | + haystack.begin() + pos, haystack.end(), |
| 998 | + needle.begin(), needle.end(), |
| 999 | + [](char a, char b) { return std::tolower(a) == std::tolower(b); } |
| 1000 | + ); |
| 1001 | + return (it == haystack.end()) ? std::string::npos : std::distance(haystack.begin(), it); |
| 1002 | +} |
| 1003 | + |
| 1004 | +static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl, const struct templates_params & inputs) { |
| 1005 | + common_chat_params data; |
| 1006 | + const auto is_json_schema_provided = !inputs.json_schema.is_null(); |
| 1007 | + const auto is_grammar_provided = !inputs.grammar.empty(); |
| 1008 | + const auto are_tools_provided = inputs.tools.is_array() && !inputs.tools.empty(); |
| 1009 | + |
| 1010 | + // the logic requires potentially modifying the messages |
| 1011 | + auto tweaked_messages = inputs.messages; |
| 1012 | + |
| 1013 | + auto replace_json_schema_marker = [](json & messages) -> bool { |
| 1014 | + static std::string marker1 = "force json schema.\n"; |
| 1015 | + static std::string marker2 = "force json schema."; |
| 1016 | + |
| 1017 | + if (messages.empty() || messages.at(0).at("role") != "system") { |
| 1018 | + return false; |
| 1019 | + } |
| 1020 | + |
| 1021 | + std::string content = messages.at(0).at("content"); |
| 1022 | + |
| 1023 | + for (const auto & marker : {marker1, marker2}) { |
| 1024 | + const auto pos = ifind_string(content, marker); |
| 1025 | + if (pos != std::string::npos) { |
| 1026 | + content.replace(pos, marker.length(), ""); |
| 1027 | + // inject modified content back into the messages |
| 1028 | + messages.at(0).at("content") = content; |
| 1029 | + return true; |
| 1030 | + } |
| 1031 | + } |
| 1032 | + |
| 1033 | + return false; |
| 1034 | + }; |
| 1035 | + |
| 1036 | + // Lfm2 model does not natively work with json, but can generally understand the tools structure |
| 1037 | + // |
| 1038 | + // Example of the pytorch dialog structure: |
| 1039 | + // <|startoftext|><|im_start|>system |
| 1040 | + // List of tools: <|tool_list_start|>[{"name": "get_candidate_status", "description": "Retrieves the current status of a candidate in the recruitment process", "parameters": {"type": "object", "properties": {"candidate_id": {"type": "string", "description": "Unique identifier for the candidate"}}, "required": ["candidate_id"]}}]<|tool_list_end|><|im_end|> |
| 1041 | + // <|im_start|>user |
| 1042 | + // What is the current status of candidate ID 12345?<|im_end|> |
| 1043 | + // <|im_start|>assistant |
| 1044 | + // <|tool_call_start|>[get_candidate_status(candidate_id="12345")]<|tool_call_end|>Checking the current status of candidate ID 12345.<|im_end|> |
| 1045 | + // <|im_start|>tool |
| 1046 | + // <|tool_response_start|>{"candidate_id": "12345", "status": "Interview Scheduled", "position": "Clinical Research Associate", "date": "2023-11-20"}<|tool_response_end|><|im_end|> |
| 1047 | + // <|im_start|>assistant |
| 1048 | + // The candidate with ID 12345 is currently in the "Interview Scheduled" stage for the position of Clinical Research Associate, with an interview date set for 2023-11-20.<|im_end|> |
| 1049 | + // |
| 1050 | + // For the llama server compatibility with json tools semantic, |
| 1051 | + // the client can add "Follow json schema." line into the system message prompt to force the json output. |
| 1052 | + // |
| 1053 | + if (are_tools_provided && (is_json_schema_provided || is_grammar_provided)) { |
| 1054 | + // server/utils.hpp prohibits that branch for the custom grammar anyways |
| 1055 | + throw std::runtime_error("Tools call must not use \"json_schema\" or \"grammar\", use non-tool invocation if you want to use custom grammar"); |
| 1056 | + } else if (are_tools_provided && replace_json_schema_marker(tweaked_messages)) { |
| 1057 | + LOG_INF("%s: Using tools to build a grammar\n", __func__); |
| 1058 | + |
| 1059 | + data.grammar = build_grammar([&](const common_grammar_builder & builder) { |
| 1060 | + auto schemas = json::array(); |
| 1061 | + foreach_function(inputs.tools, [&](const json & tool) { |
| 1062 | + const auto & function = tool.at("function"); |
| 1063 | + schemas.push_back({ |
| 1064 | + {"type", "object"}, |
| 1065 | + {"properties", { |
| 1066 | + {"name", { |
| 1067 | + {"type", "string"}, |
| 1068 | + {"const", function.at("name")}, |
| 1069 | + }}, |
| 1070 | + {"arguments", function.at("parameters")}, |
| 1071 | + }}, |
| 1072 | + {"required", json::array({"name", "arguments", "id"})}, |
| 1073 | + }); |
| 1074 | + }); |
| 1075 | + auto schema = json { |
| 1076 | + {"type", "array"}, |
| 1077 | + {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, |
| 1078 | + {"minItems", 1}, |
| 1079 | + }; |
| 1080 | + if (!inputs.parallel_tool_calls) { |
| 1081 | + schema["maxItems"] = 1; |
| 1082 | + } |
| 1083 | + |
| 1084 | + builder.add_rule("root", "\"<|tool_call_start|>\"" + builder.add_schema("tool_calls", schema) + "\"<|tool_call_end|>\""); |
| 1085 | + }); |
| 1086 | + // model has no concept of tool selection mode choice, |
| 1087 | + // if the system prompt rendered correctly it will produce a tool call |
| 1088 | + // the grammar goes inside the tool call body |
| 1089 | + data.grammar_lazy = true; |
| 1090 | + data.grammar_triggers = {{COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL, "\\s*<\\|tool_call_start\\|>\\s*\\["}}; |
| 1091 | + data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"}; |
| 1092 | + data.format = COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS; |
| 1093 | + } else if (are_tools_provided && (!is_json_schema_provided && !is_grammar_provided)) { |
| 1094 | + LOG_INF("%s: Using tools without json schema or grammar\n", __func__); |
| 1095 | + // output those tokens |
| 1096 | + data.preserved_tokens = {"<|tool_call_start|>", "<|tool_call_end|>"}; |
| 1097 | + } else if (is_json_schema_provided) { |
| 1098 | + LOG_INF("%s: Using provided json schema to build a grammar\n", __func__); |
| 1099 | + data.grammar = json_schema_to_grammar(inputs.json_schema); |
| 1100 | + } else if (is_grammar_provided) { |
| 1101 | + LOG_INF("%s: Using provided grammar\n", __func__); |
| 1102 | + data.grammar = inputs.grammar; |
| 1103 | + } else { |
| 1104 | + LOG_INF("%s: Using content relying on the template\n", __func__); |
| 1105 | + } |
| 1106 | + |
| 1107 | + data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); |
| 1108 | + LOG_DBG("%s: Prompt: %s\n", __func__, data.prompt.c_str()); |
| 1109 | + |
| 1110 | + return data; |
| 1111 | +} |
| 1112 | + |
989 | 1113 | static common_chat_params common_chat_params_init_magistral(const common_chat_template & tmpl, const struct templates_params & inputs) { |
990 | 1114 | common_chat_params data; |
991 | 1115 | data.prompt = apply(tmpl, inputs); |
@@ -2499,6 +2623,71 @@ static void common_chat_parse_apertus(common_chat_msg_parser & builder) { |
2499 | 2623 | builder.add_content(builder.consume_rest()); |
2500 | 2624 | } |
2501 | 2625 |
|
| 2626 | + |
| 2627 | +static void common_chat_parse_lfm2(common_chat_msg_parser & builder) { |
| 2628 | + if (!builder.syntax().parse_tool_calls) { |
| 2629 | + builder.add_content(builder.consume_rest()); |
| 2630 | + return; |
| 2631 | + } |
| 2632 | + |
| 2633 | + // LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|> |
| 2634 | + static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>")); |
| 2635 | + static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>")); |
| 2636 | + |
| 2637 | + // Loop through all tool calls |
| 2638 | + while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) { |
| 2639 | + builder.move_to(res->groups[0].end); |
| 2640 | + |
| 2641 | + // Parse JSON array format: [{"name": "...", "arguments": {...}}] |
| 2642 | + auto tool_calls_data = builder.consume_json(); |
| 2643 | + |
| 2644 | + // Consume end marker |
| 2645 | + builder.consume_spaces(); |
| 2646 | + if (!builder.try_consume_regex(tool_call_end_regex)) { |
| 2647 | + throw common_chat_msg_partial_exception("Expected <|tool_call_end|>"); |
| 2648 | + } |
| 2649 | + |
| 2650 | + // Process each tool call in the array |
| 2651 | + if (tool_calls_data.json.is_array()) { |
| 2652 | + for (const auto & tool_call : tool_calls_data.json) { |
| 2653 | + if (!tool_call.is_object()) { |
| 2654 | + throw common_chat_msg_partial_exception("Tool call must be an object"); |
| 2655 | + } |
| 2656 | + |
| 2657 | + if (!tool_call.contains("name")) { |
| 2658 | + throw common_chat_msg_partial_exception("Tool call missing 'name' field"); |
| 2659 | + } |
| 2660 | + |
| 2661 | + std::string function_name = tool_call.at("name"); |
| 2662 | + std::string arguments = "{}"; |
| 2663 | + |
| 2664 | + if (tool_call.contains("arguments")) { |
| 2665 | + if (tool_call.at("arguments").is_object()) { |
| 2666 | + arguments = tool_call.at("arguments").dump(); |
| 2667 | + } else if (tool_call.at("arguments").is_string()) { |
| 2668 | + arguments = tool_call.at("arguments"); |
| 2669 | + } |
| 2670 | + } |
| 2671 | + |
| 2672 | + if (!builder.add_tool_call(function_name, "", arguments)) { |
| 2673 | + throw common_chat_msg_partial_exception("Incomplete tool call"); |
| 2674 | + } |
| 2675 | + } |
| 2676 | + } else { |
| 2677 | + throw common_chat_msg_partial_exception("Expected JSON array for tool calls"); |
| 2678 | + } |
| 2679 | + |
| 2680 | + // Consume any trailing whitespace after this tool call |
| 2681 | + builder.consume_spaces(); |
| 2682 | + } |
| 2683 | + |
| 2684 | + // Consume any remaining content after all tool calls |
| 2685 | + auto remaining = builder.consume_rest(); |
| 2686 | + if (!string_strip(remaining).empty()) { |
| 2687 | + builder.add_content(remaining); |
| 2688 | + } |
| 2689 | +} |
| 2690 | + |
2502 | 2691 | static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) { |
2503 | 2692 | // Parse thinking tags first - this handles the main reasoning content |
2504 | 2693 | builder.try_parse_reasoning("<seed:think>", "</seed:think>"); |
@@ -2748,6 +2937,12 @@ static common_chat_params common_chat_templates_apply_jinja( |
2748 | 2937 | return common_chat_params_init_apertus(tmpl, params); |
2749 | 2938 | } |
2750 | 2939 |
|
| 2940 | + // LFM2 (w/ tools) |
| 2941 | + if (src.find("List of tools: <|tool_list_start|>[") != std::string::npos && |
| 2942 | + src.find("]<|tool_list_end|>") != std::string::npos) { |
| 2943 | + return common_chat_params_init_lfm2(tmpl, params); |
| 2944 | + } |
| 2945 | + |
2751 | 2946 | // Use generic handler when mixing tools + JSON schema. |
2752 | 2947 | // TODO: support that mix in handlers below. |
2753 | 2948 | if ((params.tools.is_array() && params.json_schema.is_object())) { |
@@ -2926,6 +3121,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) { |
2926 | 3121 | case COMMON_CHAT_FORMAT_APERTUS: |
2927 | 3122 | common_chat_parse_apertus(builder); |
2928 | 3123 | break; |
| 3124 | + case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS: |
| 3125 | + common_chat_parse_lfm2(builder); |
| 3126 | + break; |
2929 | 3127 | default: |
2930 | 3128 | throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format)); |
2931 | 3129 | } |
|
0 commit comments