Skip to content

Commit cdea6a9

Browse files
author
ochafik
committed
update --reasoning-format={disabled -> nothink} as suggested
1 parent b6eb0a5 commit cdea6a9

File tree

6 files changed

+22
-21
lines changed

6 files changed

+22
-21
lines changed

common/arg.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2848,12 +2848,12 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
28482848
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA"));
28492849
add_opt(common_arg(
28502850
{"--reasoning-format"}, "FORMAT",
2851-
"reasoning format (default: deepseek; allowed values: deepseek, none, disabled)\n"
2852-
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only), 'disabled' prevents generation of thoughts (by closing any thoughts tag or setting template-specific variables such as `enable_thinking: false` for Qwen3).",
2851+
"reasoning format (default: deepseek; allowed values: deepseek, none, nothink)\n"
2852+
"controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only), 'nothink' prevents generation of thoughts (by closing any thoughts tag or setting template-specific variables such as `enable_thinking: false` for Qwen3).",
28532853
[](common_params & params, const std::string & value) {
28542854
/**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
28552855
else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
2856-
else if (value == "disabled") { params.reasoning_format = COMMON_REASONING_FORMAT_DISABLED; }
2856+
else if (value == "nothink") { params.reasoning_format = COMMON_REASONING_FORMAT_NOTHINK; }
28572857
else { std::invalid_argument("invalid value"); }
28582858
}
28592859
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));

common/chat.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -594,9 +594,9 @@ std::string common_chat_format_name(common_chat_format format) {
594594

595595
std::string common_reasoning_format_name(common_reasoning_format format) {
596596
switch (format) {
597-
case COMMON_REASONING_FORMAT_NONE: return "none";
597+
case COMMON_REASONING_FORMAT_NONE: return "none";
598598
case COMMON_REASONING_FORMAT_DEEPSEEK: return "deepseek";
599-
case COMMON_REASONING_FORMAT_DISABLED: return "disabled";
599+
case COMMON_REASONING_FORMAT_NOTHINK: return "nothink";
600600
default:
601601
throw std::runtime_error("Unknown reasoning format");
602602
}
@@ -1698,7 +1698,7 @@ static common_chat_params common_chat_templates_apply_jinja(
16981698
params.messages = common_chat_msgs_to_json_oaicompat<json>(inputs.messages, /* concat_text= */ !tmpl.original_caps().requires_typed_content);
16991699
params.add_generation_prompt = inputs.add_generation_prompt;
17001700
params.tool_choice = inputs.tool_choice;
1701-
params.enable_thinking = inputs.reasoning_format != COMMON_REASONING_FORMAT_DISABLED;
1701+
params.enable_thinking = inputs.reasoning_format != COMMON_REASONING_FORMAT_NOTHINK;
17021702
params.grammar = inputs.grammar;
17031703
params.now = inputs.now;
17041704
if (!inputs.json_schema.empty()) {

common/common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ struct common_params_vocoder {
216216
enum common_reasoning_format {
217217
COMMON_REASONING_FORMAT_NONE,
218218
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`
219-
COMMON_REASONING_FORMAT_DISABLED, // Disable thinking (causes any thinking tag to be closed, or empty thinking tags to be inserted, depending on the model)
219+
COMMON_REASONING_FORMAT_NOTHINK, // Forcibly disables thinking (causes any thinking tag to be closed, empty thinking tags to be inserted, or template specific variables to be set, depending on the chat format)
220220
};
221221

222222
struct common_params {

tools/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ The project is under active development, and we are [looking for feedback and co
173173
| `--no-slots` | disables slots monitoring endpoint<br/>(env: LLAMA_ARG_NO_ENDPOINT_SLOTS) |
174174
| `--slot-save-path PATH` | path to save slot kv cache (default: disabled) |
175175
| `--jinja` | use jinja template for chat (default: disabled)<br/>(env: LLAMA_ARG_JINJA) |
176-
| `--reasoning-format FORMAT` | reasoning format (default: deepseek; allowed values: deepseek, none, disabled)<br/>controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only), 'disabled' prevents generation of thoughts (by closing any thoughts tag or setting template-specific variables such as `enable_thinking: false` for Qwen3).<br/>(env: LLAMA_ARG_THINK) |
176+
| `--reasoning-format FORMAT` | reasoning format (default: deepseek; allowed values: deepseek, none, nothink)<br/>controls whether thought tags are allowed and/or extracted from the response, and in which format they're returned. 'none' leaves thoughts unparsed in `message.content`, 'deepseek' puts them in `message.reasoning_content` (for DeepSeek R1 & Command R7B only), 'nothink' prevents generation of thoughts (by closing any thoughts tag or setting template-specific variables such as `enable_thinking: false` for Qwen3).<br/>(env: LLAMA_ARG_THINK) |
177177
| `--chat-template JINJA_TEMPLATE` | set custom jinja chat template (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, falcon3, gemma, gigachat, glmedge, granite, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, smolvlm, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE) |
178178
| `--chat-template-file JINJA_TEMPLATE_FILE` | set custom jinja chat template file (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek2, deepseek3, exaone3, falcon3, gemma, gigachat, glmedge, granite, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, phi3, phi4, rwkv-world, smolvlm, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_FILE) |
179179
| `--no-prefill-assistant` | whether to prefill the assistant's response if the last message is an assistant message (default: prefill enabled)<br/>when this flag is set, if the last message is an assistant message then it will be treated as a full message and not prefilled<br/>(env: LLAMA_ARG_NO_PREFILL_ASSISTANT) |

tools/server/tests/unit/test_template.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,32 @@ def create_server():
2525
server.n_slots = 1
2626

2727

28-
@pytest.mark.parametrize("template_name,enable_thinking,expected_end", [
29-
("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", True, "<think>\n"),
30-
("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", False, "<think>\n</think>"),
28+
@pytest.mark.parametrize("tools", [None, [], [TEST_TOOL]])
29+
@pytest.mark.parametrize("template_name,nothink,expected_end", [
30+
("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", False, "<think>\n"),
31+
("deepseek-ai-DeepSeek-R1-Distill-Qwen-32B", True, "<think>\n</think>"),
3132
32-
("Qwen-Qwen3-0.6B", True, "<|im_start|>assistant\n"),
33-
("Qwen-Qwen3-0.6B", False, "<|im_start|>assistant\n<think>\n\n</think>\n\n"),
33+
("Qwen-Qwen3-0.6B", False, "<|im_start|>assistant\n"),
34+
("Qwen-Qwen3-0.6B", True, "<|im_start|>assistant\n<think>\n\n</think>\n\n"),
3435
35-
("Qwen-QwQ-32B", True, "<|im_start|>assistant\n<think>\n"),
36-
("Qwen-QwQ-32B", False, "<|im_start|>assistant\n<think>\n</think>"),
36+
("Qwen-QwQ-32B", False, "<|im_start|>assistant\n<think>\n"),
37+
("Qwen-QwQ-32B", True, "<|im_start|>assistant\n<think>\n</think>"),
3738
38-
("CohereForAI-c4ai-command-r7b-12-2024-tool_use-think", True, "<|START_THINKING|>"),
39-
("CohereForAI-c4ai-command-r7b-12-2024-tool_use-think", False, "<|START_THINKING|><|END_THINKING|>"),
39+
("CohereForAI-c4ai-command-r7b-12-2024-tool_use-think", False, "<|START_THINKING|>"),
40+
("CohereForAI-c4ai-command-r7b-12-2024-tool_use-think", True, "<|START_THINKING|><|END_THINKING|>"),
4041
])
41-
def test_enable_thinking(template_name: str, enable_thinking: bool, expected_end: str):
42+
def test_nothink(template_name: str, nothink: bool, expected_end: str, tools: list[dict]):
4243
global server
4344
server.jinja = True
44-
server.reasoning_format = 'deepseek' if enable_thinking else 'disabled'
45+
server.reasoning_format = 'nothink' if nothink else None
4546
server.chat_template_file = f'../../../models/templates/{template_name}.jinja'
4647
server.start(timeout_seconds=TIMEOUT_SERVER_START)
4748

4849
res = server.make_request("POST", "/apply-template", data={
4950
"messages": [
5051
{"role": "user", "content": "What is today?"},
5152
],
52-
"tools": [TEST_TOOL],
53+
"tools": tools,
5354
})
5455
assert res.status_code == 200
5556
prompt = res.body["prompt"]

tools/server/tests/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ class ServerProcess:
8484
draft_max: int | None = None
8585
no_webui: bool | None = None
8686
jinja: bool | None = None
87-
reasoning_format: Literal['deepseek', 'none', 'disabled'] | None = None
87+
reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None
8888
chat_template: str | None = None
8989
chat_template_file: str | None = None
9090
server_path: str | None = None

0 commit comments

Comments
 (0)