@@ -163,6 +163,19 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
163163 throw std::runtime_error (" Invalid tool_choice: " + tool_choice);
164164}
165165
166+ bool common_chat_templates_support_enable_thinking (const common_chat_templates * chat_templates) {
167+ common_chat_templates_inputs dummy_inputs;
168+ common_chat_msg msg;
169+ msg.role = " user" ;
170+ msg.content = " test" ;
171+ dummy_inputs.messages = {msg};
172+ dummy_inputs.enable_thinking = false ;
173+ const auto rendered_no_thinking = common_chat_templates_apply (chat_templates, dummy_inputs);
174+ dummy_inputs.enable_thinking = true ;
175+ const auto rendered_with_thinking = common_chat_templates_apply (chat_templates, dummy_inputs);
176+ return rendered_no_thinking.prompt != rendered_with_thinking.prompt ;
177+ }
178+
166179template <>
167180std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat (const json & messages) {
168181 std::vector<common_chat_msg> msgs;
@@ -618,6 +631,7 @@ const char * common_chat_format_name(common_chat_format format) {
618631 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return " FireFunction v2" ;
619632 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return " Functionary v3.2" ;
620633 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
634+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return " DeepSeek V3.1" ;
621635 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
622636 case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
623637 case COMMON_CHAT_FORMAT_GRANITE: return " Granite" ;
@@ -685,11 +699,13 @@ static void parse_json_tool_calls(
685699 size_t from = std::string::npos;
686700 auto first = true ;
687701 while (true ) {
702+ auto start_pos = builder.pos ();
688703 auto res = function_regex_start_only && first
689704 ? builder.try_consume_regex (*function_regex_start_only)
690705 : function_regex
691706 ? builder.try_find_regex (*function_regex, from)
692707 : std::nullopt ;
708+
693709 if (res) {
694710 std::string name;
695711 if (get_function_name) {
@@ -724,6 +740,8 @@ static void parse_json_tool_calls(
724740 return ;
725741 }
726742 throw common_chat_msg_partial_exception (" incomplete tool call" );
743+ } else {
744+ builder.move_to (start_pos);
727745 }
728746 break ;
729747 }
@@ -1375,6 +1393,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
13751393 }
13761394 return data;
13771395}
1396+
1397+ static common_chat_params common_chat_params_init_deepseek_v3_1 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1398+ common_chat_params data;
1399+
1400+ // Pass thinking context for DeepSeek V3.1 template
1401+ json additional_context = {
1402+ {" thinking" , inputs.enable_thinking },
1403+ };
1404+
1405+ auto prompt = apply (tmpl, inputs,
1406+ /* messages_override= */ inputs.messages ,
1407+ /* tools_override= */ std::nullopt ,
1408+ additional_context);
1409+ data.prompt = prompt;
1410+ data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
1411+ if (string_ends_with (data.prompt , " <think>" )) {
1412+ if (!inputs.enable_thinking ) {
1413+ data.prompt += " </think>" ;
1414+ } else {
1415+ data.thinking_forced_open = true ;
1416+ }
1417+ }
1418+ if (inputs.tools .is_array () && !inputs.tools .empty ()) {
1419+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema .is_null ();
1420+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1421+ std::vector<std::string> tool_rules;
1422+ foreach_function (inputs.tools , [&](const json & tool) {
1423+ const auto & function = tool.at (" function" );
1424+ std::string name = function.at (" name" );
1425+ auto parameters = function.at (" parameters" );
1426+ builder.resolve_refs (parameters);
1427+ tool_rules.push_back (builder.add_rule (name + " -call" ,
1428+ " ( \" <|tool▁call▁begin|>\" )? \" " + name + " <|tool▁sep|>"
1429+ " \" " + builder.add_schema (name + " -args" , parameters) + " "
1430+ " \" <|tool▁call▁end|>\" " ));
1431+ });
1432+ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1433+ // so we accept common variants (then it's all constrained)
1434+ builder.add_rule (" root" ,
1435+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1436+ " ( \" <|tool▁calls▁begin|>\" | \" <|tool_calls_begin|>\" | \" <|tool calls begin|>\" | \" <|tool\\\\ _calls\\\\ _begin|>\" | \" <|tool▁calls|>\" ) "
1437+ " (" + string_join (tool_rules, " | " ) + " )" + (inputs.parallel_tool_calls ? " *" : " " ) + " "
1438+ " \" <|tool▁calls▁end|>\" "
1439+ " space" );
1440+ data.grammar_triggers .push_back ({
1441+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1442+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1443+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1444+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) +
1445+ " (<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>|<|tool▁calls|>)[\\ s\\ S]*"
1446+ });
1447+ data.preserved_tokens = {
1448+ " <think>" ,
1449+ " </think>" ,
1450+ " <|tool▁calls▁begin|>" ,
1451+ " <|tool▁call▁begin|>" ,
1452+ " <|tool▁sep|>" ,
1453+ " <|tool▁call▁end|>" ,
1454+ " <|tool▁calls▁end|>" ,
1455+ };
1456+ });
1457+ }
1458+ return data;
1459+ }
1460+
13781461static void common_chat_parse_deepseek_r1 (common_chat_msg_parser & builder) {
13791462 builder.try_parse_reasoning (" <think>" , " </think>" );
13801463 if (!builder.syntax ().parse_tool_calls ) {
@@ -1396,6 +1479,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
13961479 tool_calls_end);
13971480}
13981481
1482+ static void common_chat_parse_deepseek_v3_1_content (common_chat_msg_parser & builder) {
1483+ static const common_regex function_regex (" (?:<|tool▁call▁begin|>)?([^\\ n<]+)(?:<|tool▁sep|>)" );
1484+
1485+ static const common_regex close_regex (" (?:[\\ s]*)?<|tool▁call▁end|>" );
1486+ static const common_regex tool_calls_begin (" (?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>|<|tool▁calls|>)" );
1487+ static const common_regex tool_calls_end (" <|tool▁calls▁end|>" );
1488+
1489+ if (!builder.syntax ().parse_tool_calls ) {
1490+ LOG_DBG (" %s: not parse_tool_calls\n " , __func__);
1491+ builder.add_content (builder.consume_rest ());
1492+ return ;
1493+ }
1494+
1495+ LOG_DBG (" %s: parse_tool_calls\n " , __func__);
1496+
1497+ parse_json_tool_calls (
1498+ builder,
1499+ /* block_open= */ tool_calls_begin,
1500+ /* function_regex_start_only= */ std::nullopt ,
1501+ function_regex,
1502+ close_regex,
1503+ tool_calls_end);
1504+ }
1505+
1506+ static void common_chat_parse_deepseek_v3_1 (common_chat_msg_parser & builder) {
1507+ // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1508+ // First try to parse using the standard reasoning parsing method
1509+ LOG_DBG (" %s: thinking_forced_open: %s\n " , __func__, std::to_string (builder.syntax ().thinking_forced_open ).c_str ());
1510+
1511+ auto start_pos = builder.pos ();
1512+ auto found_end_think = builder.try_find_literal (" </think>" );
1513+ builder.move_to (start_pos);
1514+
1515+ if (builder.syntax ().thinking_forced_open && !builder.is_partial () && !found_end_think) {
1516+ LOG_DBG (" %s: no end_think, not partial, adding content\n " , __func__);
1517+ common_chat_parse_deepseek_v3_1_content (builder);
1518+ } else if (builder.try_parse_reasoning (" <think>" , " </think>" )) {
1519+ // If reasoning was parsed successfully, the remaining content is regular content
1520+ LOG_DBG (" %s: parsed reasoning, adding content\n " , __func__);
1521+ // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1522+ common_chat_parse_deepseek_v3_1_content (builder);
1523+ } else {
1524+ if (builder.syntax ().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1525+ LOG_DBG (" %s: reasoning_format none, adding content\n " , __func__);
1526+ common_chat_parse_deepseek_v3_1_content (builder);
1527+ return ;
1528+ }
1529+ // If no reasoning tags found, check if we should treat everything as reasoning
1530+ if (builder.syntax ().thinking_forced_open ) {
1531+ // If thinking is forced open but no tags found, treat everything as reasoning
1532+ LOG_DBG (" %s: thinking_forced_open, adding reasoning content\n " , __func__);
1533+ builder.add_reasoning_content (builder.consume_rest ());
1534+ } else {
1535+ LOG_DBG (" %s: no thinking_forced_open, adding content\n " , __func__);
1536+ // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1537+ common_chat_parse_deepseek_v3_1_content (builder);
1538+ }
1539+ }
1540+ }
1541+
13991542static common_chat_params common_chat_params_init_gpt_oss (const common_chat_template & tmpl, const struct templates_params & inputs) {
14001543 common_chat_params data;
14011544 auto prompt = apply (tmpl, inputs);
@@ -2352,6 +2495,12 @@ static common_chat_params common_chat_templates_apply_jinja(
23522495 }
23532496 }
23542497
2498+ // DeepSeek V3.1: detect based on specific patterns in the template
2499+ if (src.find (" message['prefix'] is defined and message['prefix'] and thinking" ) != std::string::npos &&
2500+ params.json_schema .is_null ()) {
2501+ return common_chat_params_init_deepseek_v3_1 (tmpl, params);
2502+ }
2503+
23552504 // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
23562505 if (src.find (" <|tool▁calls▁begin|>" ) != std::string::npos && params.json_schema .is_null ()) {
23572506 return common_chat_params_init_deepseek_r1 (tmpl, params);
@@ -2524,6 +2673,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
25242673 case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
25252674 common_chat_parse_deepseek_r1 (builder);
25262675 break ;
2676+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
2677+ common_chat_parse_deepseek_v3_1 (builder);
2678+ break ;
25272679 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
25282680 common_chat_parse_functionary_v3_2 (builder);
25292681 break ;
0 commit comments