@@ -274,44 +274,43 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
274274
275275
276276@pytest .mark .slow
277- @pytest .mark .parametrize ("reasoning_format,hf_repo,template_override" , [
278- ('deepseek' , "bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M" , ("CohereForAI/c4ai-command-r7b-12-2024" , "tool_use" )),
277+ @pytest .mark .parametrize ("hf_repo,template_override" , [
278+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
279+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
279280
280- (None , "bartowski/Meta-Llama-3.1-8B-Instruct -GGUF:Q4_K_M" , None ),
281- (None , "bartowski/Meta-Llama-3.1-8B-Instruct -GGUF:Q4_K_M" , "chatml" ),
281+ ("bartowski/Phi-3.5-mini-instruct -GGUF:Q4_K_M" , None ),
282+ ("bartowski/Phi-3.5-mini-instruct -GGUF:Q4_K_M" , "chatml" ),
282283
283- (None , "bartowski/Phi-3 .5-mini-instruct -GGUF:Q4_K_M" , None ),
284- (None , "bartowski/Phi-3 .5-mini-instruct -GGUF:Q4_K_M" , "chatml" ),
284+ ("bartowski/Qwen2 .5-7B-Instruct -GGUF:Q4_K_M" , None ),
285+ ("bartowski/Qwen2 .5-7B-Instruct -GGUF:Q4_K_M" , "chatml" ),
285286
286- (None , "bartowski/Qwen2.5-7B-Instruct- GGUF:Q4_K_M" , None ),
287- (None , "bartowski/Qwen2.5-7B-Instruct- GGUF:Q4_K_M" , "chatml" ),
287+ ("bartowski/Hermes-2-Pro-Llama-3-8B- GGUF:Q4_K_M" , ( "NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" ) ),
288+ ("bartowski/Hermes-2-Pro-Llama-3-8B- GGUF:Q4_K_M" , "chatml" ),
288289
289- (None , "bartowski/Hermes-2-Pro- Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro- Llama-3-8B" , "tool_use" )),
290- (None , "bartowski/Hermes-2-Pro- Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
290+ ("bartowski/Hermes-3- Llama-3.1 -8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3- Llama-3.1 -8B" , "tool_use" )),
291+ ("bartowski/Hermes-3- Llama-3.1 -8B-GGUF:Q4_K_M" , "chatml" ),
291292
292- (None , "bartowski/Hermes-3-Llama-3.1-8B- GGUF:Q4_K_M" , ( "NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" ) ),
293- (None , "bartowski/Hermes-3-Llama-3.1-8B- GGUF:Q4_K_M" , "chatml" ),
293+ ("bartowski/Mistral-Nemo-Instruct-2407- GGUF:Q4_K_M" , None ),
294+ ("bartowski/Mistral-Nemo-Instruct-2407- GGUF:Q4_K_M" , "chatml" ),
294295
295- (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M " , None ),
296- (None , "bartowski/Mistral-Nemo-Instruct-2407- GGUF:Q4_K_M" , "chatml" ),
296+ ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , ( "meetkai/functionary-medium-v3.2 " , None ) ),
297+ ("bartowski/functionary-small-v3.2- GGUF:Q8_0" , "chatml" ),
297298
298- (None , "bartowski/functionary-small-v3 .2-GGUF:Q8_0 " , ( "meetkai/functionary-medium-v3.2 " , None )),
299- (None , "bartowski/functionary-small-v3 .2-GGUF:Q8_0" , "chatml" ),
299+ ("bartowski/Llama-3 .2-3B-Instruct- GGUF:Q4_K_M " , ( "meta-llama/Llama-3.2-3B-Instruct " , None )),
300+ ("bartowski/Llama-3 .2-3B-Instruct- GGUF:Q4_K_M" , "chatml" ),
300301
301- (None , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
302- (None , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
302+ ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q6_K_L" , ("CohereForAI/c4ai-command-r7b-12-2024" , "tool_use" )),
303303
304- ('deepseek' , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
304+ ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
305305
306306 # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
307- (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
307+ ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
308308
309309 # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
310310])
311- def test_weather (reasoning_format : Literal [ 'deepseek' , 'none' ] | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
311+ def test_weather (hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
312312 global server
313313 n_predict = 512
314- server .reasoning_format = reasoning_format
315314 server .n_slots = 1
316315 server .jinja = True
317316 server .n_ctx = 8192
@@ -441,8 +440,8 @@ def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str,
441440
442441@pytest .mark .slow
443442@pytest .mark .parametrize ("n_predict,reasoning_format,expect_content,expect_reasoning_content,hf_repo,template_override" , [
444- # (1024, 'deepseek', "^The sum of 102 and 7 is 109.*", "^The user's request is straightforward.*", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
445- # (128, None, "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
443+ (1024 , 'deepseek' , "^The sum of 102 and 7 is 109.*" , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
444+ (128 , None , "^The sum of 102 and 7 is 109.*" , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
446445
447446 (1024 , 'deepseek' , "To find the sum of.*" , "I need to calculate the sum of 102 and 7.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
448447 (1024 , 'none' , "<think>\n I need[\\ s\\ S\\ r\\ n]*?</think>\n To find.*" , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
@@ -491,22 +490,22 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none']
491490@pytest .mark .slow
492491@pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
493492 (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
494- (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , "chatml" ),
493+ # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", "chatml"),
495494
496495 (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
497496 (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
498497
499498 (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
500499 (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
501500
502- (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
503- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
501+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
502+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
504503
505- ('{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
504+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
506505 (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
507506
508507 ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
509- ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
508+ (None , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
510509
511510 (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
512511 (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
@@ -523,7 +522,7 @@ def test_thoughts(n_predict: int, reasoning_format: Literal['deepseek', 'none']
523522 # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
524523 (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
525524])
526- def test_hello_world (reasoning_format : Literal [ 'deepseek' , 'none' ] | None , expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
525+ def test_hello_world (expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
527526 global server
528527 server .n_slots = 1
529528 server .jinja = True
0 commit comments