@@ -133,6 +133,7 @@ struct templates_params {
133133 bool stream;
134134 std::string grammar;
135135 bool add_generation_prompt = true ;
136+ bool enable_thinking = true ;
136137 std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
137138};
138139
@@ -573,7 +574,7 @@ common_chat_templates_ptr common_chat_templates_init(
573574 return tmpls;
574575}
575576
576- std::string common_chat_format_name (common_chat_format format) {
577+ const char * common_chat_format_name (common_chat_format format) {
577578 switch (format) {
578579 case COMMON_CHAT_FORMAT_CONTENT_ONLY: return " Content-only" ;
579580 case COMMON_CHAT_FORMAT_GENERIC: return " Generic" ;
@@ -591,6 +592,15 @@ std::string common_chat_format_name(common_chat_format format) {
591592 }
592593}
593594
595+ const char * common_reasoning_format_name (common_reasoning_format format) {
596+ switch (format) {
597+ case COMMON_REASONING_FORMAT_NONE: return " none" ;
598+ case COMMON_REASONING_FORMAT_DEEPSEEK: return " deepseek" ;
599+ default :
600+ throw std::runtime_error (" Unknown reasoning format" );
601+ }
602+ }
603+
594604static std::string wrap_code_as_arguments (common_chat_msg_parser & builder, const std::string & code) {
595605 std::string arguments;
596606 if (builder.is_partial ()) {
@@ -918,7 +928,13 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
918928 data.prompt = apply (tmpl, adjusted_messages, inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , {});
919929 data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
920930 if (string_ends_with (data.prompt , " <|START_THINKING|>" )) {
921- data.thinking_forced_open = true ;
931+ if (!inputs.enable_thinking ) {
932+ data.prompt += " <|END_THINKING|>" ;
933+ } else {
934+ data.thinking_forced_open = true ;
935+ }
936+ } else if (!inputs.enable_thinking && string_ends_with (data.prompt , " <|CHATBOT_TOKEN|>" )) {
937+ data.prompt += " <|START_THINKING|><|END_THINKING|>" ;
922938 }
923939
924940 data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
@@ -1186,7 +1202,11 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
11861202 data.prompt = prompt;
11871203 data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
11881204 if (string_ends_with (data.prompt , " <think>\n " )) {
1189- data.thinking_forced_open = true ;
1205+ if (!inputs.enable_thinking ) {
1206+ data.prompt += " </think>" ;
1207+ } else {
1208+ data.thinking_forced_open = true ;
1209+ }
11901210 }
11911211
11921212 if (inputs.tools .is_array () && !inputs.tools .empty ()) {
@@ -1460,104 +1480,114 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser
14601480static common_chat_params common_chat_params_init_hermes_2_pro (const common_chat_template & tmpl, const struct templates_params & inputs) {
14611481 common_chat_params data;
14621482
1463- data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt );
1483+ json additional_context = {
1484+ {" enable_thinking" , inputs.enable_thinking },
1485+ };
1486+
1487+ data.prompt = apply (tmpl, inputs.messages , inputs.tools .empty () ? json () : inputs.tools , inputs.add_generation_prompt , additional_context);
14641488 data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
14651489 if (string_ends_with (data.prompt , " <think>\n " )) {
1466- data.thinking_forced_open = true ;
1490+ if (!inputs.enable_thinking ) {
1491+ data.prompt += " </think>" ;
1492+ } else {
1493+ data.thinking_forced_open = true ;
1494+ }
14671495 }
14681496
1469- // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1470- data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1471- data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1472- std::vector<std::string> tool_rules;
1473- std::vector<std::string> tool_call_alts;
1474- std::vector<std::string> escaped_names;
1475- foreach_function (inputs.tools , [&](const json & tool) {
1476- const auto & function = tool.at (" function" );
1477- std::string name = function.at (" name" );
1478- auto parameters = function.at (" parameters" );
1479- builder.resolve_refs (parameters);
1480- tool_rules.push_back (builder.add_schema (name + " -call" , {
1481- {" type" , " object" },
1482- {" properties" , json {
1483- {" name" , json {{" const" , name}}},
1484- {" arguments" , parameters},
1485- }},
1486- {" required" , json::array ({" name" , " arguments" })},
1487- }));
1488- tool_call_alts.push_back (builder.add_rule (
1489- name + " -function-tag" ,
1490- " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1491- builder.add_schema (name + " -args" , parameters) + " "
1492- " \" </function>\" space" ));
1497+ if (!inputs.tools .is_null ()) {
1498+ // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1499+ data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1500+ data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
1501+ std::vector<std::string> tool_rules;
1502+ std::vector<std::string> tool_call_alts;
1503+ std::vector<std::string> escaped_names;
1504+ foreach_function (inputs.tools , [&](const json & tool) {
1505+ const auto & function = tool.at (" function" );
1506+ std::string name = function.at (" name" );
1507+ auto parameters = function.at (" parameters" );
1508+ builder.resolve_refs (parameters);
1509+ tool_rules.push_back (builder.add_schema (name + " -call" , {
1510+ {" type" , " object" },
1511+ {" properties" , json {
1512+ {" name" , json {{" const" , name}}},
1513+ {" arguments" , parameters},
1514+ }},
1515+ {" required" , json::array ({" name" , " arguments" })},
1516+ }));
1517+ tool_call_alts.push_back (builder.add_rule (
1518+ name + " -function-tag" ,
1519+ " \" <function\" ( \" =" + name + " \" | \" name=\\\" " + name + " \\\"\" ) \" >\" space " +
1520+ builder.add_schema (name + " -args" , parameters) + " "
1521+ " \" </function>\" space" ));
14931522
1494- data.grammar_triggers .push_back ({
1495- COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1496- " <function=" + name + " >" ,
1523+ data.grammar_triggers .push_back ({
1524+ COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
1525+ " <function=" + name + " >" ,
1526+ });
1527+ auto escaped_name = regex_escape (name);
1528+ data.grammar_triggers .push_back ({
1529+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1530+ " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1531+ });
1532+ escaped_names.push_back (escaped_name);
14971533 });
1498- auto escaped_name = regex_escape (name);
1534+ auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1535+ std::vector<std::string> alt_tags {
1536+ any_tool_call,
1537+ " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1538+ // The rest is just to accommodate common "good bad" outputs.
1539+ " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1540+ " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1541+ " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1542+ " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1543+ " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1544+ " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1545+ };
1546+ auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1547+ tool_call_alts.push_back (wrappable_tool_call);
1548+ tool_call_alts.push_back (
1549+ " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1550+ auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1551+ builder.add_rule (" root" ,
1552+ std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1553+ (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1554+ // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
14991555 data.grammar_triggers .push_back ({
1500- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
1501- " <function\\ s+name\\ s*=\\ s*\" " + escaped_name + " \" " ,
1556+ COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1557+ // If thinking_forced_open, then we capture the </think> tag in the grammar,
1558+ // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1559+ std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1560+ " (\\ s*"
1561+ " (?:<tool_call>"
1562+ " |<function"
1563+ " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1564+ " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1565+ " )"
1566+ " )[\\ s\\ S]*"
1567+ ),
15021568 });
1503- escaped_names.push_back (escaped_name);
1504- });
1505- auto any_tool_call = builder.add_rule (" any_tool_call" , " ( " + string_join (tool_rules, " | " ) + " ) space" );
1506- std::vector<std::string> alt_tags {
1507- any_tool_call,
1508- " \" <tool_call>\" space " + any_tool_call + " \" </tool_call>\" " ,
1509- // The rest is just to accommodate common "good bad" outputs.
1510- " \" <function_call>\" space " + any_tool_call + " \" </function_call>\" " ,
1511- " \" <response>\" space " + any_tool_call + " \" </response>\" " ,
1512- " \" <tools>\" space " + any_tool_call + " \" </tools>\" " ,
1513- " \" <json>\" space " + any_tool_call + " \" </json>\" " ,
1514- " \" <xml>\" space " + any_tool_call + " \" </xml>\" " ,
1515- " \" <JSON>\" space " + any_tool_call + " \" </JSON>\" " ,
1516- };
1517- auto wrappable_tool_call = builder.add_rule (" wrappable_tool_call" , " ( " + string_join (alt_tags, " | " ) + " ) space" );
1518- tool_call_alts.push_back (wrappable_tool_call);
1519- tool_call_alts.push_back (
1520- " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
1521- auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1522- builder.add_rule (" root" ,
1523- std::string (data.thinking_forced_open ? " ( \" </think>\" space )? " : " " ) +
1524- (inputs.parallel_tool_calls ? " (" + tool_call + " )+" : tool_call));
1525- // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
1526- data.grammar_triggers .push_back ({
1527- COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
1528- // If thinking_forced_open, then we capture the </think> tag in the grammar,
1529- // (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
1530- std::string (data.thinking_forced_open ? " [\\ s\\ S]*?(</think>\\ s*)" : " (?:<think>[\\ s\\ S]*?</think>\\ s*)?" ) + (
1531- " (\\ s*"
1532- " (?:<tool_call>"
1533- " |<function"
1534- " |(?:```(?:json|xml)?\n\\ s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?"
1535- " \\ s*\\ {\\ s*\" name\"\\ s*:\\ s*\" (?:" + string_join (escaped_names, " |" ) + " )\" "
1536- " )"
1537- " )[\\ s\\ S]*"
1538- ),
1569+ data.preserved_tokens = {
1570+ " <think>" ,
1571+ " </think>" ,
1572+ " <tool_call>" ,
1573+ " </tool_call>" ,
1574+ " <function" ,
1575+ " <tools>" ,
1576+ " </tools>" ,
1577+ " <response>" ,
1578+ " </response>" ,
1579+ " <function_call>" ,
1580+ " </function_call>" ,
1581+ " <json>" ,
1582+ " </json>" ,
1583+ " <JSON>" ,
1584+ " </JSON>" ,
1585+ " ```" ,
1586+ " ```json" ,
1587+ " ```xml" ,
1588+ };
15391589 });
1540- data.preserved_tokens = {
1541- " <think>" ,
1542- " </think>" ,
1543- " <tool_call>" ,
1544- " </tool_call>" ,
1545- " <function" ,
1546- " <tools>" ,
1547- " </tools>" ,
1548- " <response>" ,
1549- " </response>" ,
1550- " <function_call>" ,
1551- " </function_call>" ,
1552- " <json>" ,
1553- " </json>" ,
1554- " <JSON>" ,
1555- " </JSON>" ,
1556- " ```" ,
1557- " ```json" ,
1558- " ```xml" ,
1559- };
1560- });
1590+ }
15611591
15621592 return data;
15631593}
@@ -1669,6 +1699,7 @@ static common_chat_params common_chat_templates_apply_jinja(
16691699 params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages , /* concat_text= */ !tmpl.original_caps ().requires_typed_content );
16701700 params.add_generation_prompt = inputs.add_generation_prompt ;
16711701 params.tool_choice = inputs.tool_choice ;
1702+ params.enable_thinking = inputs.enable_thinking ;
16721703 params.grammar = inputs.grammar ;
16731704 params.now = inputs.now ;
16741705 if (!inputs.json_schema .empty ()) {
@@ -1702,7 +1733,7 @@ static common_chat_params common_chat_templates_apply_jinja(
17021733 }
17031734
17041735 // Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
1705- if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null () && params. tools . is_array () && params. json_schema . is_null () ) {
1736+ if (src.find (" <tool_call>" ) != std::string::npos && params.json_schema .is_null ()) {
17061737 return common_chat_params_init_hermes_2_pro (tmpl, params);
17071738 }
17081739
@@ -1821,7 +1852,7 @@ static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
18211852}
18221853
18231854static void common_chat_parse (common_chat_msg_parser & builder, common_chat_format format) {
1824- LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (format). c_str () , builder.input ().c_str ());
1855+ LOG_DBG (" Parsing input with format %s: %s\n " , common_chat_format_name (format), builder.input ().c_str ());
18251856
18261857 switch (format) {
18271858 case COMMON_CHAT_FORMAT_CONTENT_ONLY:
@@ -1858,7 +1889,7 @@ static void common_chat_parse(common_chat_msg_parser & builder, common_chat_form
18581889 common_chat_parse_command_r7b (builder);
18591890 break ;
18601891 default :
1861- throw std::runtime_error (" Unsupported format: " + common_chat_format_name (format));
1892+ throw std::runtime_error (std::string ( " Unsupported format: " ) + common_chat_format_name (format));
18621893 }
18631894 builder.finish ();
18641895}
0 commit comments