@@ -149,6 +149,7 @@ struct templates_params {
149
149
bool add_bos;
150
150
bool add_eos;
151
151
bool is_inference = true ;
152
+ bool supports_enable_thinking = false ;
152
153
};
153
154
154
155
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat (const std::string & tool_choice) {
@@ -171,10 +172,8 @@ bool common_chat_templates_support_enable_thinking(const common_chat_templates *
171
172
msg.content = " test" ;
172
173
dummy_inputs.messages = {msg};
173
174
dummy_inputs.enable_thinking = false ;
174
- const auto rendered_no_thinking = common_chat_templates_apply (chat_templates, dummy_inputs);
175
- dummy_inputs.enable_thinking = true ;
176
- const auto rendered_with_thinking = common_chat_templates_apply (chat_templates, dummy_inputs);
177
- return rendered_no_thinking.prompt != rendered_with_thinking.prompt ;
175
+ const auto rendered = common_chat_templates_apply (chat_templates, dummy_inputs);
176
+ return rendered.supports_enable_thinking ;
178
177
}
179
178
180
179
template <>
@@ -827,6 +826,7 @@ static std::string apply(
827
826
828
827
static common_chat_params common_chat_params_init_generic (const common_chat_template & tmpl, const struct templates_params & inputs) {
829
828
common_chat_params data;
829
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
830
830
831
831
auto tool_call_schemas = json::array ();
832
832
foreach_function (inputs.tools , [&](const json & tool) {
@@ -944,6 +944,7 @@ static void common_chat_parse_generic(common_chat_msg_parser & builder) {
944
944
945
945
static common_chat_params common_chat_params_init_mistral_nemo (const common_chat_template & tmpl, const struct templates_params & inputs) {
946
946
common_chat_params data;
947
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
947
948
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
948
949
data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
949
950
auto schemas = json::array ();
@@ -989,6 +990,7 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat
989
990
990
991
static common_chat_params common_chat_params_init_magistral (const common_chat_template & tmpl, const struct templates_params & inputs) {
991
992
common_chat_params data;
993
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
992
994
data.prompt = apply (tmpl, inputs);
993
995
data.format = COMMON_CHAT_FORMAT_MAGISTRAL;
994
996
data.preserved_tokens = {
@@ -1069,6 +1071,7 @@ static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
1069
1071
1070
1072
static common_chat_params common_chat_params_init_command_r7b (const common_chat_template & tmpl, const struct templates_params & inputs) {
1071
1073
common_chat_params data;
1074
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1072
1075
1073
1076
auto adjusted_messages = json::array ();
1074
1077
for (const auto & msg : inputs.messages ) {
@@ -1202,6 +1205,7 @@ static void expect_tool_parameters(const std::string & name, const json & parame
1202
1205
static common_chat_params common_chat_params_init_llama_3_x (const common_chat_template & tmpl, const struct templates_params & inputs, bool allow_python_tag_builtin_tools) {
1203
1206
auto builtin_tools = json::array ();
1204
1207
common_chat_params data;
1208
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1205
1209
if (!inputs.tools .is_null ()) {
1206
1210
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1207
1211
data.grammar = build_grammar ([&](const common_grammar_builder & builder) {
@@ -1281,6 +1285,7 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te
1281
1285
1282
1286
static common_chat_params common_chat_params_init_nemotron_v2 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1283
1287
common_chat_params data;
1288
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1284
1289
1285
1290
// Generate the prompt using the apply() function with the template
1286
1291
data.prompt = apply (tmpl, inputs);
@@ -1342,6 +1347,7 @@ static common_chat_params common_chat_params_init_nemotron_v2(const common_chat_
1342
1347
1343
1348
static common_chat_params common_chat_params_init_apertus (const common_chat_template & tmpl, const struct templates_params & inputs) {
1344
1349
common_chat_params data;
1350
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1345
1351
1346
1352
// Generate the prompt using the apply() function with the template
1347
1353
data.prompt = apply (tmpl, inputs);
@@ -1466,6 +1472,7 @@ static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool w
1466
1472
1467
1473
static common_chat_params common_chat_params_init_deepseek_r1 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1468
1474
common_chat_params data;
1475
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1469
1476
auto prompt = apply (tmpl, inputs);
1470
1477
1471
1478
// Hacks to fix the official (broken) prompt.
@@ -1540,6 +1547,7 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
1540
1547
1541
1548
static common_chat_params common_chat_params_init_deepseek_v3_1 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1542
1549
common_chat_params data;
1550
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1543
1551
1544
1552
// Pass thinking context for DeepSeek V3.1 template
1545
1553
json additional_context = {
@@ -1685,6 +1693,7 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
1685
1693
1686
1694
static common_chat_params common_chat_params_init_gpt_oss (const common_chat_template & tmpl, const struct templates_params & inputs) {
1687
1695
common_chat_params data;
1696
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1688
1697
auto prompt = apply (tmpl, inputs);
1689
1698
1690
1699
// Check if we need to replace the return token with end token during
@@ -1904,6 +1913,7 @@ static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
1904
1913
static common_chat_params common_chat_params_init_firefunction_v2 (const common_chat_template & tmpl, const struct templates_params & inputs) {
1905
1914
LOG_DBG (" %s\n " , __func__);
1906
1915
common_chat_params data;
1916
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1907
1917
const std::optional<json> tools_override = json ();
1908
1918
const std::optional<json> additional_context = json {
1909
1919
{" datetime" , format_time (inputs.now , " %b %d %Y %H:%M:%S GMT" )},
@@ -1962,6 +1972,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
1962
1972
// Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
1963
1973
// If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code.
1964
1974
common_chat_params data;
1975
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
1965
1976
data.prompt = apply (tmpl, inputs);
1966
1977
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
1967
1978
if (inputs.tools .is_array () && !inputs.tools .empty ()) {
@@ -2038,6 +2049,7 @@ static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder)
2038
2049
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1 (const common_chat_template & tmpl, const struct templates_params & inputs) {
2039
2050
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
2040
2051
common_chat_params data;
2052
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
2041
2053
2042
2054
if (!inputs.tools .is_null ()) {
2043
2055
std::string python_code_argument_name;
@@ -2121,6 +2133,7 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser
2121
2133
2122
2134
static common_chat_params common_chat_params_init_hermes_2_pro (const common_chat_template & tmpl, const struct templates_params & inputs) {
2123
2135
common_chat_params data;
2136
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
2124
2137
2125
2138
json extra_context = json {
2126
2139
{" enable_thinking" , inputs.enable_thinking },
@@ -2314,6 +2327,7 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
2314
2327
2315
2328
static common_chat_params common_chat_params_init_granite (const common_chat_template & tmpl, const struct templates_params & inputs) {
2316
2329
common_chat_params data;
2330
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
2317
2331
2318
2332
// Pass thinking context for Granite template
2319
2333
json additional_context = {
@@ -2588,6 +2602,7 @@ static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
2588
2602
2589
2603
static common_chat_params common_chat_params_init_without_tools (const common_chat_template & tmpl, const struct templates_params & inputs) {
2590
2604
common_chat_params data;
2605
+ data.supports_enable_thinking = inputs.supports_enable_thinking ;
2591
2606
data.prompt = apply (tmpl, inputs);
2592
2607
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
2593
2608
data.grammar_lazy = false ;
@@ -2600,18 +2615,20 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
2600
2615
data.grammar = inputs.grammar ;
2601
2616
}
2602
2617
2603
- static constexpr size_t think_tag_len = 7 ; // strlen("<think>")
2604
- size_t prompt_trimmed_size = data.prompt .size ();
2605
- while (prompt_trimmed_size > 0 &&
2606
- std::isspace (static_cast <unsigned char >(data.prompt [prompt_trimmed_size - 1 ]))) {
2607
- --prompt_trimmed_size;
2608
- }
2609
- if (prompt_trimmed_size >= think_tag_len &&
2610
- data.prompt .compare (prompt_trimmed_size - think_tag_len, think_tag_len, " <think>" ) == 0 ) {
2611
- if (!inputs.enable_thinking ) {
2612
- data.prompt += " </think>" ;
2613
- } else {
2614
- data.thinking_forced_open = true ;
2618
+ if (inputs.supports_enable_thinking ) {
2619
+ static constexpr size_t think_tag_len = 7 ; // strlen("<think>")
2620
+ size_t prompt_trimmed_size = data.prompt .size ();
2621
+ while (prompt_trimmed_size > 0 &&
2622
+ std::isspace (static_cast <unsigned char >(data.prompt [prompt_trimmed_size - 1 ]))) {
2623
+ --prompt_trimmed_size;
2624
+ }
2625
+ if (prompt_trimmed_size >= think_tag_len &&
2626
+ data.prompt .compare (prompt_trimmed_size - think_tag_len, think_tag_len, " <think>" ) == 0 ) {
2627
+ if (!inputs.enable_thinking ) {
2628
+ data.prompt += " </think>" ;
2629
+ } else {
2630
+ data.thinking_forced_open = true ;
2631
+ }
2615
2632
}
2616
2633
}
2617
2634
return data;
@@ -2623,6 +2640,7 @@ static common_chat_params common_chat_params_init_seed_oss(
2623
2640
const common_chat_templates_inputs & inputs)
2624
2641
{
2625
2642
common_chat_params data;
2643
+ data.supports_enable_thinking = params.supports_enable_thinking ;
2626
2644
data.prompt = apply (tmpl, params);
2627
2645
data.format = COMMON_CHAT_FORMAT_SEED_OSS;
2628
2646
if (string_ends_with (data.prompt , " <seed:think>" )) {
@@ -2696,6 +2714,15 @@ static common_chat_params common_chat_templates_apply_jinja(
2696
2714
params.extra_context [el.first ] = json::parse (el.second );
2697
2715
}
2698
2716
2717
+ {
2718
+ auto params_with_thinking = params;
2719
+ params_with_thinking.enable_thinking = true ;
2720
+ auto params_without_thinking = params;
2721
+ params_without_thinking.enable_thinking = false ;
2722
+ params.supports_enable_thinking =
2723
+ apply (tmpl, params_with_thinking) != apply (tmpl, params_without_thinking);
2724
+ }
2725
+
2699
2726
if (!inputs.json_schema .empty ()) {
2700
2727
params.json_schema = json::parse (inputs.json_schema );
2701
2728
}
0 commit comments