@@ -133,6 +133,7 @@ struct templates_params {
133133 bool stream;
134134 std::string grammar;
135135 bool add_generation_prompt = true ;
136+ bool enable_thinking = true ;
136137 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
137138};
138139
@@ -573,7 +574,7 @@ common_chat_templates_ptr common_chat_templates_init(
573574 return tmpls;
574575}
575576
576- std::string common_chat_format_name (common_chat_format format) {
577+ const char * common_chat_format_name (common_chat_format format) {
577578 switch (format) {
578579 case COMMON_CHAT_FORMAT_CONTENT_ONLY: return " Content-only" ;
579580 case COMMON_CHAT_FORMAT_GENERIC: return " Generic" ;
@@ -591,6 +592,15 @@ std::string common_chat_format_name(common_chat_format format) {
591592 }
592593}
593594
595+ const char * common_reasoning_format_name (common_reasoning_format format) {
596+ switch (format) {
597+ case COMMON_REASONING_FORMAT_NONE: return " none" ;
598+ case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
599+ default :
600+ throw std::runtime_error (" Unknown reasoning format" );
601+ }
602+ }
603+
594604static std::string wrap_code_as_arguments (common_chat_msg_parser & builder, const std::string & code) {
595605 std::string arguments;
596606 if (builder.is_partial ()) {
@@ -924,7 +934,13 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
924934 data.prompt = apply (tmpl, adjusted_messages, inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , {});
925935 data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
926936 if (string_ends_with (data.prompt , " <|START_THINKING|>" )) {
927- data.thinking_forced_open = true ;
937+ if (!inputs.enable_thinking ) {
938+ data.prompt += " <|END_THINKING|>" ;
939+ } else {
940+ data.thinking_forced_open = true ;
941+ }
942+ } else if (!inputs.enable_thinking && string_ends_with (data.prompt , " <|CHATBOT_TOKEN|>" )) {
943+ data.prompt += " <|START_THINKING|><|END_THINKING|>" ;
928944 }
929945
930946 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1192,7 +1208,11 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
11921208 data.prompt = prompt;
11931209 data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
11941210 if (string_ends_with (data.prompt , " <think>\n " )) {
1195- data.thinking_forced_open = true ;
1211+ if (!inputs.enable_thinking ) {
1212+ data.prompt += " </think>" ;
1213+ } else {
1214+ data.thinking_forced_open = true ;
1215+ }
11961216 }
11971217
11981218 if (inputs.tools .is_array () && !inputs.tools .empty ()) {
@@ -1477,104 +1497,114 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser
14771497static common_chat_params common_chat_params_init_hermes_2_pro (const common_chat_template & tmpl, const struct templates_params & inputs) {
14781498 common_chat_params data;
14791499
1480- data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt );
1500+ json additional_context = {
1501+ {" enable_thinking" , inputs.enable_thinking },
1502+ };
1503+
1504+ data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , additional_context);
14811505 data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
14821506 if (string_ends_with (data.prompt , " <think>\n " )) {
1483- data.thinking_forced_open = true ;
1507+ if (!inputs.enable_thinking ) {
1508+ data.prompt += " </think>" ;
1509+ } else {
1510+ data.thinking_forced_open = true ;
1511+ }
14841512 }
14851513
1486- // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1487- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1488- data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1489- std::vector<std::string> tool_rules;
1490- std::vector<std::string> tool_call_alts;
1491- std::vector<std::string> escaped_names;
1492- foreach_function (inputs.tools , [&](const json & tool) {
1493- const auto & function = tool.at (" function" );
1494- std::string name = function.at (" name" );
1495- auto parameters = function.at (" parameters" );
1496- builder.resolve_refs (parameters);
1497- tool_rules.push_back (builder.add_schema (name + " -call" , {
1498- {" type" , " object" },
1499- {" properties" , json {
1500- {" name" , json {{" const" , name}}},
1501- {" arguments" , parameters},
1502- }},
1503- {" required" , json::array ({" name" , " arguments" })},
1504- }));
1505- tool_call_alts.push_back (builder.add_rule (
1506- name + " -function-tag" ,
1507- " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1508- builder.add_schema (name + " -args" , parameters) + " "
1509- " \" </function>\" space" ));
1514+ if (!inputs.tools .is_null ()) {
1515+ // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1516+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1517+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1518+ std::vector<std::string> tool_rules;
1519+ std::vector<std::string> tool_call_alts;
1520+ std::vector<std::string> escaped_names;
1521+ foreach_function (inputs.tools , [&](const json & tool) {
1522+ const auto & function = tool.at (" function" );
1523+ std::string name = function.at (" name" );
1524+ auto parameters = function.at (" parameters" );
1525+ builder.resolve_refs (parameters);
1526+ tool_rules.push_back (builder.add_schema (name + " -call" , {
1527+ {" type" , " object" },
1528+ {" properties" , json {
1529+ {" name" , json {{" const" , name}}},
1530+ {" arguments" , parameters},
1531+ }},
1532+ {" required" , json::array ({" name" , " arguments" })},
1533+ }));
1534+ tool_call_alts.push_back (builder.add_rule (
1535+ name + " -function-tag" ,
1536+ " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1537+ builder.add_schema (name + " -args" , parameters) + " "
1538+ " \" </function>\" space" ));
15101539
1511- data.grammar_triggers .push_back ({
1512- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1513- " <function=" + name + " >" ,
1540+ data.grammar_triggers .push_back ({
1541+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1542+ " <function=" + name + " >" ,
1543+ });
1544+ auto escaped_name = regex_escape (name);
1545+ data.grammar_triggers .push_back ({
1546+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1547+ " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1548+ });
1549+ escaped_names.push_back (escaped_name);
15141550 });
1515- auto escaped_name = regex_escape (name);
1551+ auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1552+ std::vector<std::string> alt_tags {
1553+ any_tool_call,
1554+ " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1555+ // The rest is just to accommodate common "good bad" outputs.
1556+ " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1557+ " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1558+ " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1559+ " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1560+ " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1561+ " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1562+ };
1563+ auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1564+ tool_call_alts.push_back (wrappable_tool_call);
1565+ tool_call_alts.push_back (
1566+ " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1567+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1568+ builder.add_rule (" root" ,
1569+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1570+ (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1571+ // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
15161572 data.grammar_triggers .push_back ({
1517- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1518- " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1573+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1574+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1575+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1576+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1577+ " (\\ s*"
1578+ " (?:<tool_call>"
1579+ " |<function"
1580+ " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1581+ " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1582+ " )"
1583+ " )[\\ s\\ S]*"
1584+ ),
15191585 });
1520- escaped_names.push_back (escaped_name);
1521- });
1522- auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1523- std::vector<std::string> alt_tags {
1524- any_tool_call,
1525- " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1526- // The rest is just to accommodate common "good bad" outputs.
1527- " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1528- " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1529- " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1530- " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1531- " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1532- " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1533- };
1534- auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1535- tool_call_alts.push_back (wrappable_tool_call);
1536- tool_call_alts.push_back (
1537- " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1538- auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1539- builder.add_rule (" root" ,
1540- std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1541- (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1542- // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1543- data.grammar_triggers .push_back ({
1544- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1545- // If thinking_forced_open, then we capture the </think> tag in the grammar,
1546- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1547- std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1548- " (\\ s*"
1549- " (?:<tool_call>"
1550- " |<function"
1551- " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1552- " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1553- " )"
1554- " )[\\ s\\ S]*"
1555- ),
1586+ data.preserved_tokens = {
1587+ " <think>" ,
1588+ " </think>" ,
1589+ " <tool_call>" ,
1590+ " </tool_call>" ,
1591+ " <function" ,
1592+ " <tools>" ,
1593+ " </tools>" ,
1594+ " <response>" ,
1595+ " </response>" ,
1596+ " <function_call>" ,
1597+ " </function_call>" ,
1598+ " <json>" ,
1599+ " </json>" ,
1600+ " <JSON>" ,
1601+ " </JSON>" ,
1602+ " ```" ,
1603+ " ```json" ,
1604+ " ```xml" ,
1605+ };
15561606 });
1557- data.preserved_tokens = {
1558- " <think>" ,
1559- " </think>" ,
1560- " <tool_call>" ,
1561- " </tool_call>" ,
1562- " <function" ,
1563- " <tools>" ,
1564- " </tools>" ,
1565- " <response>" ,
1566- " </response>" ,
1567- " <function_call>" ,
1568- " </function_call>" ,
1569- " <json>" ,
1570- " </json>" ,
1571- " <JSON>" ,
1572- " </JSON>" ,
1573- " ```" ,
1574- " ```json" ,
1575- " ```xml" ,
1576- };
1577- });
1607+ }
15781608
15791609 return data;
15801610}
@@ -1688,6 +1718,7 @@ static common_chat_params common_chat_templates_apply_jinja(
16881718 params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages , /* concat_text= */ !tmpl.original_caps ().requires_typed_content );
16891719 params.add_generation_prompt = inputs.add_generation_prompt ;
16901720 params.tool_choice = inputs.tool_choice ;
1721+ params.enable_thinking = inputs.enable_thinking ;
16911722 params.grammar = inputs.grammar ;
16921723 params.now = inputs.now ;
16931724 if (!inputs.json_schema .empty ()) {
@@ -1721,7 +1752,7 @@ static common_chat_params common_chat_templates_apply_jinja(
17211752 }
17221753
17231754 // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1724- if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null () && params. tools . is_array () && params. json_schema . is_null () ) {
1755+ if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
17251756 return common_chat_params_init_hermes_2_pro (tmpl, params);
17261757 }
17271758
@@ -1840,7 +1871,7 @@ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
18401871}
18411872
18421873static void common_chat_parse (common_chat_msg_parser & builder) {
1843- LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (builder.syntax ().format ). c_str () , builder.input ().c_str ());
1874+ LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (builder.syntax ().format ), builder.input ().c_str ());
18441875
18451876 switch (builder.syntax ().format ) {
18461877 case COMMON_CHAT_FORMAT_CONTENT_ONLY:
0 commit comments