Skip to content

Commit 9b47a58

Browse files
fix: add generic fallback to detect trailing <think> tags in Jinja templates and handle forced-open reasoning blocks
- Detect trailing <think> tags in generic chat templates, trim whitespace, and either append the closing tag or mark the reasoning block as forced-open based on enable_thinking - Added a regression test covering a fallback template that opens the reasoning block in the prompt and verifies prompt differences, forced-open behaviour, and reasoning parsing - Now compatible with models using the default Jinja chat template, such as https://huggingface.co/unsloth/GLM-Z1-32B-0414-GGUF
1 parent 3df2244 commit 9b47a58

File tree

2 files changed

+61
-0
lines changed

2 files changed

+61
-0
lines changed

common/chat.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <minja/chat-template.hpp>
1010
#include <minja/minja.hpp>
1111

12+
#include <cctype>
1213
#include <cstdio>
1314
#include <exception>
1415
#include <iostream>
@@ -2598,6 +2599,21 @@ static common_chat_params common_chat_params_init_without_tools(const common_cha
25982599
} else {
25992600
data.grammar = inputs.grammar;
26002601
}
2602+
2603+
static constexpr size_t think_tag_len = 7; // strlen("<think>")
2604+
size_t prompt_trimmed_size = data.prompt.size();
2605+
while (prompt_trimmed_size > 0 &&
2606+
std::isspace(static_cast<unsigned char>(data.prompt[prompt_trimmed_size - 1]))) {
2607+
--prompt_trimmed_size;
2608+
}
2609+
if (prompt_trimmed_size >= think_tag_len &&
2610+
data.prompt.compare(prompt_trimmed_size - think_tag_len, think_tag_len, "<think>") == 0) {
2611+
if (!inputs.enable_thinking) {
2612+
data.prompt += "</think>";
2613+
} else {
2614+
data.thinking_forced_open = true;
2615+
}
2616+
}
26012617
return data;
26022618
}
26032619

tests/test-chat.cpp

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,6 +1330,51 @@ static void test_template_output_parsers() {
13301330
// /* expect_grammar_triggered= */ true,
13311331
// /* test_grammar_if_triggered= */ false);
13321332
}
1333+
{
1334+
// Generic fallback template that appends <think> when add_generation_prompt is true.
1335+
static const char * tmpl_str = R"(
1336+
{% for message in messages %}
1337+
<|{{ message.role }}|>
1338+
{{ message.content }}
1339+
{% endfor %}
1340+
{% if add_generation_prompt %}<|assistant|>
1341+
<think>
1342+
{% endif %}
1343+
)";
1344+
1345+
auto tmpls = common_chat_templates_ptr(common_chat_templates_init(/* model= */ nullptr, tmpl_str));
1346+
1347+
common_chat_templates_inputs inputs_base;
1348+
inputs_base.messages = { message_user };
1349+
inputs_base.add_generation_prompt = true;
1350+
1351+
auto inputs_no_thinking = inputs_base;
1352+
inputs_no_thinking.enable_thinking = false;
1353+
auto params_no_thinking = common_chat_templates_apply(tmpls.get(), inputs_no_thinking);
1354+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params_no_thinking.format);
1355+
assert_equals(false, params_no_thinking.thinking_forced_open);
1356+
assert_equals(true, string_ends_with(params_no_thinking.prompt, "</think>"));
1357+
1358+
auto inputs_with_thinking = inputs_base;
1359+
inputs_with_thinking.enable_thinking = true;
1360+
auto params_with_thinking = common_chat_templates_apply(tmpls.get(), inputs_with_thinking);
1361+
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, params_with_thinking.format);
1362+
assert_equals(true, params_with_thinking.thinking_forced_open);
1363+
assert_equals(true, string_ends_with(string_strip(params_with_thinking.prompt), "<think>"));
1364+
1365+
assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
1366+
1367+
common_chat_syntax syntax;
1368+
syntax.format = params_with_thinking.format;
1369+
syntax.reasoning_format = COMMON_REASONING_FORMAT_AUTO;
1370+
syntax.thinking_forced_open = params_with_thinking.thinking_forced_open;
1371+
1372+
assert_msg_equals(simple_assist_msg("Final answer", "Reasoning trace"),
1373+
common_chat_parse(
1374+
"Reasoning trace</think>Final answer",
1375+
/* is_partial= */ false,
1376+
syntax));
1377+
}
13331378
{
13341379
// Replacement DeepSeek R1 template. Makes the Distill Qwen 7B/32B models happy to call tools and all.
13351380
auto tmpls = read_templates("models/templates/llama-cpp-deepseek-r1.jinja");

0 commit comments

Comments
 (0)