-
Notifications
You must be signed in to change notification settings - Fork 13.3k
Thinking model disabled assistant prefill #15404
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
f6648cf
8e4c1c1
c31c9bc
7cf5963
4c06dca
6adae51
e2a19d2
fe6b72b
f6c4aee
d7f2746
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2188,6 +2188,24 @@ struct server_context { | |
|
||
metrics.init(); | ||
|
||
// thinking is enabled if: | ||
// 1. It's not explicitly disabled (reasoning_budget == 0) | ||
// 2. The chat template supports it | ||
bool enable_thinking = params_base.reasoning_budget != 0; | ||
if (enable_thinking) { | ||
common_chat_templates_inputs dummy_inputs; | ||
common_chat_msg msg; | ||
msg.role = "user"; | ||
msg.content = "test"; | ||
dummy_inputs.messages = {msg}; | ||
dummy_inputs.enable_thinking = false; | ||
const auto rendered_no_thinking = common_chat_templates_apply(chat_templates.get(), dummy_inputs); | ||
dummy_inputs.enable_thinking = true; | ||
const auto rendered_with_thinking = common_chat_templates_apply(chat_templates.get(), dummy_inputs); | ||
enable_thinking = rendered_no_thinking.prompt != rendered_with_thinking.prompt; | ||
|
||
} | ||
SRV_INF("Enable thinking? %d\n", enable_thinking); | ||
|
||
oai_parser_opt = { | ||
/* use_jinja */ params_base.use_jinja, | ||
/* prefill_assistant */ params_base.prefill_assistant, | ||
|
@@ -2196,7 +2214,7 @@ struct server_context { | |
/* common_chat_templates */ chat_templates.get(), | ||
/* allow_image */ mctx ? mtmd_support_vision(mctx) : false, | ||
/* allow_audio */ mctx ? mtmd_support_audio (mctx) : false, | ||
/* enable_thinking */ params_base.reasoning_budget != 0, | ||
/* enable_thinking */ enable_thinking, | ||
}; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This logic could move into a free-function in
chat.*
(something likecommon_chat_supports_enable_thinking
).Also, an alternate implementation of this would be to be more explicit and do some kind of a
switch
on the subtype ofcommon_chat_params
, but that felt hard to maintain (similar to more places where an arch enum is required). Since this is a boot-time operation, it feels ok to do the extra template expansion to avoid another place where a developer would need to poke in architecture-specific logic.