@@ -631,6 +631,7 @@ const char * common_chat_format_name(common_chat_format format) {
631631 case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return " FireFunction v2" ;
632632 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return " Functionary v3.2" ;
633633 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return " Functionary v3.1 Llama 3.1" ;
634+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return " DeepSeek V3.1" ;
634635 case COMMON_CHAT_FORMAT_HERMES_2_PRO: return " Hermes 2 Pro" ;
635636 case COMMON_CHAT_FORMAT_COMMAND_R7B: return " Command R7B" ;
636637 case COMMON_CHAT_FORMAT_GRANITE: return " Granite" ;
@@ -698,11 +699,13 @@ static void parse_json_tool_calls(
698699 size_t from = std::string::npos;
699700 auto first = true ;
700701 while (true ) {
702+ auto start_pos = builder.pos ();
701703 auto res = function_regex_start_only && first
702704 ? builder.try_consume_regex (*function_regex_start_only)
703705 : function_regex
704706 ? builder.try_find_regex (*function_regex, from)
705707 : std::nullopt ;
708+
706709 if (res) {
707710 std::string name;
708711 if (get_function_name) {
@@ -737,6 +740,8 @@ static void parse_json_tool_calls(
737740 return ;
738741 }
739742 throw common_chat_msg_partial_exception (" incomplete tool call" );
743+ } else {
744+ builder.move_to (start_pos);
740745 }
741746 break ;
742747 }
@@ -1388,6 +1393,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
13881393 }
13891394 return data;
13901395}
1396+
1397+ static common_chat_params common_chat_params_init_deepseek_v3_1 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1398+ common_chat_params data;
1399+
1400+ // Pass thinking context for DeepSeek V3.1 template
1401+ json additional_context = {
1402+ {" thinking" , inputs.enable_thinking },
1403+ };
1404+
1405+ auto prompt = apply (tmpl, inputs,
1406+ /* messages_override= */ inputs.messages ,
1407+ /* tools_override= */ std::nullopt ,
1408+ additional_context);
1409+ data.prompt = prompt;
1410+ data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
1411+ if (string_ends_with (data.prompt , " <think>" )) {
1412+ if (!inputs.enable_thinking ) {
1413+ data.prompt += " </think>" ;
1414+ } else {
1415+ data.thinking_forced_open = true ;
1416+ }
1417+ }
1418+ if (inputs.tools .is_array () && !inputs.tools .empty ()) {
1419+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema .is_null ();
1420+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1421+ std::vector<std::string> tool_rules;
1422+ foreach_function (inputs.tools , [&](const json & tool) {
1423+ const auto & function = tool.at (" function" );
1424+ std::string name = function.at (" name" );
1425+ auto parameters = function.at (" parameters" );
1426+ builder.resolve_refs (parameters);
1427+ tool_rules.push_back (builder.add_rule (name + " -call" ,
1428+ " ( \" <|tool▁call▁begin|>\" )? \" " + name + " <|tool▁sep|>"
1429+ " \" " + builder.add_schema (name + " -args" , parameters) + " "
1430+ " \" <|tool▁call▁end|>\" " ));
1431+ });
1432+ // Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
1433+ // so we accept common variants (then it's all constrained)
1434+ builder.add_rule (" root" ,
1435+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1436+ " ( \" <|tool▁calls▁begin|>\" | \" <|tool_calls_begin|>\" | \" <|tool calls begin|>\" | \" <|tool\\\\ _calls\\\\ _begin|>\" | \" <|tool▁calls|>\" ) "
1437+ " (" + string_join (tool_rules, " | " ) + " )" + (inputs.parallel_tool_calls ? " *" : " " ) + " "
1438+ " \" <|tool▁calls▁end|>\" "
1439+ " space" );
1440+ data.grammar_triggers .push_back ({
1441+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1442+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1443+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1444+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) +
1445+ " (<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>|<|tool▁calls|>)[\\ s\\ S]*"
1446+ });
1447+ data.preserved_tokens = {
1448+ " <think>" ,
1449+ " </think>" ,
1450+ " <|tool▁calls▁begin|>" ,
1451+ " <|tool▁call▁begin|>" ,
1452+ " <|tool▁sep|>" ,
1453+ " <|tool▁call▁end|>" ,
1454+ " <|tool▁calls▁end|>" ,
1455+ };
1456+ });
1457+ }
1458+ return data;
1459+ }
1460+
13911461static void common_chat_parse_deepseek_r1 (common_chat_msg_parser & builder) {
13921462 builder.try_parse_reasoning (" <think>" , " </think>" );
13931463 if (!builder.syntax ().parse_tool_calls ) {
@@ -1409,6 +1479,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
14091479 tool_calls_end);
14101480}
14111481
1482+ static void common_chat_parse_deepseek_v3_1_content (common_chat_msg_parser & builder) {
1483+ static const common_regex function_regex (" (?:<|tool▁call▁begin|>)?([^\\ n<]+)(?:<|tool▁sep|>)" );
1484+
1485+ static const common_regex close_regex (" (?:[\\ s]*)?<|tool▁call▁end|>" );
1486+ static const common_regex tool_calls_begin (" (?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\ _calls\\\\ _begin|>|<|tool▁calls|>)" );
1487+ static const common_regex tool_calls_end (" <|tool▁calls▁end|>" );
1488+
1489+ if (!builder.syntax ().parse_tool_calls ) {
1490+ LOG_DBG (" %s: not parse_tool_calls\n " , __func__);
1491+ builder.add_content (builder.consume_rest ());
1492+ return ;
1493+ }
1494+
1495+ LOG_DBG (" %s: parse_tool_calls\n " , __func__);
1496+
1497+ parse_json_tool_calls (
1498+ builder,
1499+ /* block_open= */ tool_calls_begin,
1500+ /* function_regex_start_only= */ std::nullopt ,
1501+ function_regex,
1502+ close_regex,
1503+ tool_calls_end);
1504+ }
1505+
1506+ static void common_chat_parse_deepseek_v3_1 (common_chat_msg_parser & builder) {
1507+ // DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
1508+ // First try to parse using the standard reasoning parsing method
1509+ LOG_DBG (" %s: thinking_forced_open: %s\n " , __func__, std::to_string (builder.syntax ().thinking_forced_open ).c_str ());
1510+
1511+ auto start_pos = builder.pos ();
1512+ auto found_end_think = builder.try_find_literal (" </think>" );
1513+ builder.move_to (start_pos);
1514+
1515+ if (builder.syntax ().thinking_forced_open && !builder.is_partial () && !found_end_think) {
1516+ LOG_DBG (" %s: no end_think, not partial, adding content\n " , __func__);
1517+ common_chat_parse_deepseek_v3_1_content (builder);
1518+ } else if (builder.try_parse_reasoning (" <think>" , " </think>" )) {
1519+ // If reasoning was parsed successfully, the remaining content is regular content
1520+ LOG_DBG (" %s: parsed reasoning, adding content\n " , __func__);
1521+ // </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
1522+ common_chat_parse_deepseek_v3_1_content (builder);
1523+ } else {
1524+ if (builder.syntax ().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
1525+ LOG_DBG (" %s: reasoning_format none, adding content\n " , __func__);
1526+ common_chat_parse_deepseek_v3_1_content (builder);
1527+ return ;
1528+ }
1529+ // If no reasoning tags found, check if we should treat everything as reasoning
1530+ if (builder.syntax ().thinking_forced_open ) {
1531+ // If thinking is forced open but no tags found, treat everything as reasoning
1532+ LOG_DBG (" %s: thinking_forced_open, adding reasoning content\n " , __func__);
1533+ builder.add_reasoning_content (builder.consume_rest ());
1534+ } else {
1535+ LOG_DBG (" %s: no thinking_forced_open, adding content\n " , __func__);
1536+ // <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
1537+ common_chat_parse_deepseek_v3_1_content (builder);
1538+ }
1539+ }
1540+ }
1541+
14121542static common_chat_params common_chat_params_init_gpt_oss (const common_chat_template & tmpl, const struct templates_params & inputs) {
14131543 common_chat_params data;
14141544 auto prompt = apply (tmpl, inputs);
@@ -2365,6 +2495,12 @@ static common_chat_params common_chat_templates_apply_jinja(
23652495 }
23662496 }
23672497
2498+ // DeepSeek V3.1: detect based on specific patterns in the template
2499+ if (src.find (" message['prefix'] is defined and message['prefix'] and thinking" ) != std::string::npos &&
2500+ params.json_schema .is_null ()) {
2501+ return common_chat_params_init_deepseek_v3_1 (tmpl, params);
2502+ }
2503+
23682504 // DeepSeek R1: use handler in all cases except json schema (thinking / tools).
23692505 if (src.find (" <|tool▁calls▁begin|>" ) != std::string::npos && params.json_schema .is_null ()) {
23702506 return common_chat_params_init_deepseek_r1 (tmpl, params);
@@ -2537,6 +2673,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
25372673 case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
25382674 common_chat_parse_deepseek_r1 (builder);
25392675 break ;
2676+ case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
2677+ common_chat_parse_deepseek_v3_1 (builder);
2678+ break ;
25402679 case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
25412680 common_chat_parse_functionary_v3_2 (builder);
25422681 break ;
0 commit comments