@@ -345,20 +345,20 @@ def test_weather_tool_call(hf_repo: str, template_override: str | Tuple[str, str
345345
346346@pytest .mark .slow
347347@pytest .mark .parametrize ("result_override,n_predict,hf_repo,template_override" , [
348- (None , 128 , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
349- (None , 128 , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
350- (None , 128 , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
351- (None , 128 , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
352- (None , 128 , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
353- (None , 128 , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
354- (None , 128 , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
355- (None , 128 , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
356- ("^> 0.56$" , 128 , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
348+ (None , 128 , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
349+ (None , 128 , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
350+ (None , 128 , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
351+ (None , 128 , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
352+ (None , 128 , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
353+ (None , 128 , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
354+ (None , 128 , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
355+ (None , 128 , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
356+ ("^> 0.56$" , 128 , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
357357
358358 # TODO: fix these (wrong results, either didn't respect decimal instruction or got wrong value)
359- ("[\\ s\\ S\\ r\\ n]*?\\ b0\\ .55644242476$" , 128 , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
360- ("[\\ s\\ S\\ r\\ n]*?which equals 0\\ .5\\ ." , 8192 , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
361- ("** Answer:* * 0\\ .25\\ b" , 8192 , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , ("llama-cpp-deepseek-r1" , None )),
359+ ("[\\ s\\ S\\ r\\ n]*?\\ b0\\ .55644242476$" , 128 , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
360+ ("[\\ s\\ S\\ r\\ n]*?which equals 0\\ .5\\ ." , 8192 , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
361+ ("[ \\ s \\ S \\ r \\ n]*? \\ * \\ * Answer:\\ * \\ * 0\\ .25\\ b" , 8192 , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , ("llama-cpp-deepseek-r1" , None )),
362362])
363363def test_calc_result (result_override : str | None , n_predict : int , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
364364 global server
@@ -435,6 +435,46 @@ def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str,
435435 f'Expected something like "The y coordinate is 0.56.", got { content } '
436436
437437
438+ @pytest .mark .slow
439+ @pytest .mark .parametrize ("n_predict,expect_content,expect_thoughts,hf_repo,template_override" , [
440+ (128 , "^The sum of 102 and 7 is 109.*" , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
441+ (1024 , "To find the sum of.*" , "I need to calculate the sum of 102 and 7.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
442+ (1024 , "To find the sum of.*" , "First, I need to add the tens place.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , ("llama-cpp-deepseek-r1" , None )),
443+ ])
444+ def test_thoughts (n_predict : int , expect_content : str | None , expect_thoughts : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
445+ global server
446+ server .n_slots = 1
447+ server .jinja = True
448+ server .n_ctx = 8192 * 2
449+ server .n_predict = n_predict
450+ server .model_hf_repo = hf_repo
451+ server .model_hf_file = None
452+ if isinstance (template_override , tuple ):
453+ (template_hf_repo , template_variant ) = template_override
454+ server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
455+ assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
456+ elif isinstance (template_override , str ):
457+ server .chat_template = template_override
458+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
459+ res = server .make_request ("POST" , "/chat/completions" , data = {
460+ "max_tokens" : n_predict ,
461+ "messages" : [
462+ {"role" : "user" , "content" : "What's the sum of 102 and 7?" },
463+ ]
464+ }, timeout = TIMEOUT_HTTP_REQUEST )
465+ assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
466+ choice = res .body ["choices" ][0 ]
467+ assert choice ["message" ].get ("tool_calls" ) is None , f'Expected no tool call in { choice ["message" ]} '
468+
469+ content = choice ["message" ].get ("content" )
470+ if expect_content is not None :
471+ assert re .match (expect_content , content ), f'Expected { expect_content } , got { content } '
472+
473+ thoughts = choice ["message" ].get ("thoughts" )
474+ if expect_thoughts is not None :
475+ assert re .match (expect_thoughts , thoughts ), f'Expected { expect_thoughts } , got { thoughts } '
476+
477+
438478@pytest .mark .slow
439479@pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
440480 (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
0 commit comments