@@ -1767,9 +1767,11 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
17671767 }
17681768
17691769 if (!inputs.tools .is_null ()) {
1770- auto supports_thinking = tmpl.source ().find (" <think>" ) != std::string::npos && data.thinking_forced_open == false ;
1770+ auto supports_thinking = tmpl.source ().find (" <think>" ) != std::string::npos;
1771+ // you should not be able to call enable_thinking if <think> is not supported
1772+ GGML_ASSERT (!extra_context[" enable_thinking" ] || extra_context[" enable_thinking" ] == supports_thinking);
17711773 // (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
1772- data.grammar_lazy = inputs. tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED || supports_thinking ;
1774+ data.grammar_lazy = true ;
17731775 data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
17741776 std::vector<std::string> tool_rules;
17751777 std::vector<std::string> tool_call_alts;
@@ -1821,13 +1823,27 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
18211823 tool_call_alts.push_back (
18221824 " ( \" ```\\ n\" | \" ```json\\ n\" | \" ```xml\\ n\" ) space " + wrappable_tool_call + " space \" ```\" space " );
18231825 auto tool_call = builder.add_rule (" tool_call" , string_join (tool_call_alts, " | " ));
1824- if (supports_thinking) {
1825- builder.add_rule (" thinking" , " \" <think>\" [^\\ x00]* \" </think>\" space" );
1826+
1827+ builder.add_rule (" thinking_start" , " \" <think>\" " );
1828+ builder.add_rule (" thinking_content" , " [^\\ x00]*" );
1829+ builder.add_rule (" thinking_end" , " \" </think>\" space" );
1830+
1831+ // thinking grammar logic depending on if thinking_forced_open was to true (so already opened (and maybe closed)) and if thinking is even allowed
1832+ std::string thinking_grammar_logic = " " ; // thinking tag was closed or not supported/wanted
1833+ if (extra_context[" enable_thinking" ]) {
1834+ if (data.thinking_forced_open ) {
1835+ // thinking tag was already opened by used so we don't need to add it again
1836+ thinking_grammar_logic = " thinking_content thinking_end " ;
1837+ }
1838+ else
1839+ {
1840+ thinking_grammar_logic = " thinking_start thinking_content thinking_end " ;
1841+ }
18261842 }
1827- builder. add_rule ( " root " ,
1828- std::string (supports_thinking ? " (thinking)? space " :
1829- data. thinking_forced_open ? " ( \" </think> \" space )? " : " " ) +
1830- (inputs. parallel_tool_calls ? " ( " + tool_call + " )+ " : tool_call));
1843+
1844+
1845+ builder. add_rule ( " root " , thinking_grammar_logic + (inputs. parallel_tool_calls ? " (" + tool_call + " )+ " : tool_call));
1846+
18311847 // Trigger on some common known "good bad" outputs (only from the start and with a json that's about a specific argument name to avoid false positives)
18321848 data.grammar_triggers .push_back ({
18331849 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
0 commit comments