Skip to content

Commit cc18ecc

Browse files
author
CNE FICHEPOIL Pierre
committed
added tests
1 parent 6441ad4 commit cc18ecc

File tree

2 files changed

+121
-0
lines changed

2 files changed

+121
-0
lines changed

tests/test-chat.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1111,6 +1111,68 @@ static void test_template_output_parsers() {
11111111
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
11121112
}));
11131113
}
1114+
{
1115+
auto tmpls = read_templates("models/templates/Qwen-Qwen3-0.6B.jinja");
1116+
std::vector<std::string> end_tokens{ "<|im_end|>" };
1117+
1118+
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
1119+
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
1120+
1121+
// Test that enable_thinking=false adds empty think tags
1122+
{
1123+
common_chat_templates_inputs inputs_no_thinking;
1124+
inputs_no_thinking.messages = {message_user};
1125+
inputs_no_thinking.tools = tools;
1126+
inputs_no_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1127+
inputs_no_thinking.enable_thinking = false;
1128+
1129+
auto params = common_chat_templates_apply(tmpls.get(), inputs_no_thinking);
1130+
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format);
1131+
// Verify the prompt contains empty think tags when thinking is disabled
1132+
assert_equals(true, params.prompt.find("<think>\n\n</think>") != std::string::npos);
1133+
}
1134+
1135+
// Test that grammar allows thinking with REQUIRED tool choice
1136+
{
1137+
common_chat_templates_inputs inputs_with_thinking;
1138+
inputs_with_thinking.messages = {message_user};
1139+
inputs_with_thinking.tools = tools;
1140+
inputs_with_thinking.tool_choice = COMMON_CHAT_TOOL_CHOICE_REQUIRED;
1141+
inputs_with_thinking.enable_thinking = true;
1142+
1143+
auto params = common_chat_templates_apply(tmpls.get(), inputs_with_thinking);
1144+
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, params.format);
1145+
1146+
// The key fix: grammar should contain the thinking pattern even with REQUIRED
1147+
assert_equals(false, params.grammar.empty());
1148+
assert_equals(true, params.grammar.find("</think>") != std::string::npos);
1149+
1150+
// Grammar should allow thinking before tool calls
1151+
assert_equals(true, params.grammar.find("think-") != std::string::npos ||
1152+
params.grammar.find("<think>") != std::string::npos);
1153+
}
1154+
1155+
// Test parsing: tool call with thinking works correctly
1156+
assert_msg_equals(message_assist_call_thoughts,
1157+
common_chat_parse(
1158+
"<think>I'm\nthinking</think>\n"
1159+
"<tool_call>{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}</tool_call>",
1160+
/* is_partial= */ false,
1161+
{
1162+
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
1163+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1164+
}));
1165+
1166+
// Test that reasoning + tool calls work in template generation
1167+
test_templates(tmpls.get(), end_tokens, message_assist_call_thoughts, tools,
1168+
"", // Don't check exact delta, just verify it parses correctly
1169+
/* expect_grammar_triggered= */ true,
1170+
/* test_grammar_if_triggered= */ true,
1171+
COMMON_REASONING_FORMAT_DEEPSEEK);
1172+
1173+
// Verify enable_thinking support
1174+
assert_equals(true, common_chat_templates_support_enable_thinking(tmpls.get()));
1175+
}
11141176
{
11151177
auto tmpls = read_templates("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja");
11161178
std::vector<std::string> end_tokens{ "<|eom_id|>", "<|eot_id|>" };

tools/server/tests/unit/test_tool_call.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,3 +623,62 @@ def do_test_hello_world(server: ServerProcess, **kwargs):
623623
code = actual_arguments["code"]
624624
assert isinstance(code, str), f"Expected code to be a string, got {type(code)}: {json.dumps(code)}"
625625
assert re.match(r'''print\(("[Hh]ello,? [Ww]orld!?"|'[Hh]ello,? [Ww]orld!?')\)''', re.sub(r'#.*\n?', '', code)), f'Expected hello world, got {code}'
626+
627+
628+
629+
@pytest.mark.slow
630+
@pytest.mark.parametrize("stream", [CompletionMode.NORMAL, CompletionMode.STREAMED])
631+
@pytest.mark.parametrize("tool,hf_repo,template_override,reasoning_format", [
632+
(PYTHON_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'),
633+
(TEST_TOOL, "unsloth/Qwen3-0.6B-GGUF:Q4_K_M", None, 'deepseek'),
634+
])
635+
def test_required_tool_with_reasoning(tool: dict, hf_repo: str, template_override: str | Tuple[str, str | None] | None, reasoning_format: Literal['deepseek', 'none'], stream: CompletionMode):
636+
global server
637+
n_predict = 512
638+
639+
# Set the reasoning format
640+
server.reasoning_format = reasoning_format
641+
642+
server.jinja = True
643+
server.n_ctx = 8192
644+
server.n_predict = n_predict
645+
server.model_hf_repo = hf_repo
646+
server.model_hf_file = None
647+
648+
649+
server.start(timeout_seconds=TIMEOUT_START_SLOW)
650+
651+
# Make the request with "tool_choice": "required"
652+
body = server.make_any_request("POST", "/v1/chat/completions", data={
653+
"max_tokens": n_predict,
654+
"messages": [
655+
{"role": "system", "content": "You are a coding assistant."},
656+
{"role": "user", "content": "Write an example"}, # This prompt will force the tool use
657+
],
658+
"tool_choice": "required",
659+
"tools": [tool],
660+
"parallel_tool_calls": False,
661+
"stream": stream == CompletionMode.STREAMED,
662+
"temperature": 0.0,
663+
"top_k": 1,
664+
"top_p": 1.0,
665+
}, timeout=TIMEOUT_HTTP_REQUEST)
666+
667+
choice = body["choices"][0]
668+
669+
670+
reasoning_content:str = choice["message"].get("reasoning_content")
671+
assert reasoning_content is not None, 'Expected reasoning content, but got None'
672+
assert len(reasoning_content.strip()) > 3, 'Reasoning content is too small to be credible'
673+
674+
tool_calls = choice["message"].get("tool_calls")
675+
assert tool_calls and len(tool_calls) == 1, f'Expected 1 tool call in {choice["message"]}'
676+
tool_call = tool_calls[0]
677+
expected_function_name = "python" if tool["type"] == "code_interpreter" else tool["function"]["name"]
678+
assert expected_function_name == tool_call["function"]["name"]
679+
680+
actual_arguments = json.loads(tool_call["function"]["arguments"])
681+
if tool is PYTHON_TOOL:
682+
assert "code" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'code'"
683+
elif tool is TEST_TOOL:
684+
assert "success" in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: 'success'"

0 commit comments

Comments
 (0)