@@ -274,44 +274,44 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
274274
275275
276276@pytest .mark .slow
277- @pytest .mark .parametrize ("think ,hf_repo,template_override" , [
278- (True , "bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M" , ("CohereForAI/c4ai-command-r7b-12-2024" , "tool_use" )),
277+ @pytest .mark .parametrize ("reasoning_format ,hf_repo,template_override" , [
278+ ('deepseek' , "bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M" , ("CohereForAI/c4ai-command-r7b-12-2024" , "tool_use" )),
279279
280- (False , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
281- (False , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
280+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
281+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
282282
283- (False , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
284- (False , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
283+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
284+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
285285
286- (False , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
287- (False , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
286+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
287+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
288288
289- (False , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
290- (False , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
289+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
290+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
291291
292- (False , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
293- (False , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
292+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
293+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
294294
295- (False , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
296- (False , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
295+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
296+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
297297
298- (False , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
299- (False , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
298+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
299+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
300300
301- (False , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
302- (False , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
301+ (None , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
302+ (None , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
303303
304- (True , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
304+ ('deepseek' , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
305305
306306 # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
307- (False , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
307+ (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
308308
309309 # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
310310])
311- def test_weather (think : bool , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
311+ def test_weather (reasoning_format : Literal [ 'deepseek' , 'none' ] | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
312312 global server
313313 n_predict = 512
314- server .think = think
314+ server .reasoning_format = reasoning_format
315315 server .n_slots = 1
316316 server .jinja = True
317317 server .n_ctx = 8192
@@ -440,19 +440,19 @@ def test_calc_result(result_override: str | None, n_predict: int, hf_repo: str,
440440
441441
442442@pytest .mark .slow
443- @pytest .mark .parametrize ("n_predict,think ,expect_content,expect_reasoning_content,hf_repo,template_override" , [
444- (1024 , True , "^The sum of 102 and 7 is 109.*" , "^The user's request is straightforward.*" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
445- (128 , False , "^The sum of 102 and 7 is 109.*" , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
443+ @pytest .mark .parametrize ("n_predict,reasoning_format ,expect_content,expect_reasoning_content,hf_repo,template_override" , [
444+ # (1024, 'deepseek' , "^The sum of 102 and 7 is 109.*", "^The user's request is straightforward.*", "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
445+ # (128, None, "^The sum of 102 and 7 is 109.*", None, "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M", None),
446446
447- (1024 , True , "To find the sum of.*" , "I need to calculate the sum of 102 and 7.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
448- (1024 , False , "<think>\n I need[\\ s\\ S\\ r\\ n]*?</think>\n To find.*" , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
447+ (1024 , 'deepseek' , "To find the sum of.*" , "I need to calculate the sum of 102 and 7.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
448+ (1024 , 'none' , "<think>\n I need[\\ s\\ S\\ r\\ n]*?</think>\n To find.*" , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
449449
450- (1024 , True , "To find the sum of.*" , "First, I need to add the tens place.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , ("llama-cpp-deepseek-r1" , None )),
450+ (1024 , 'deepseek' , "To find the sum of.*" , "First, I need to add the tens place.*" , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , ("llama-cpp-deepseek-r1" , None )),
451451])
452- def test_thoughts (n_predict : int , think : bool , expect_content : str | None , expect_reasoning_content : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
452+ def test_thoughts (n_predict : int , reasoning_format : Literal [ 'deepseek' , 'none' ] | None , expect_content : str | None , expect_reasoning_content : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
453453 global server
454454 server .n_slots = 1
455- server .think = think
455+ server .reasoning_format = reasoning_format
456456 server .jinja = True
457457 server .n_ctx = 8192 * 2
458458 server .n_predict = n_predict
@@ -489,45 +489,44 @@ def test_thoughts(n_predict: int, think: bool, expect_content: str | None, expec
489489
490490
491491@pytest .mark .slow
492- @pytest .mark .parametrize ("think, expected_arguments_override,hf_repo,template_override" , [
493- (True , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
494- (True , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , "chatml" ),
492+ @pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
493+ (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
494+ (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , "chatml" ),
495495
496- (False , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
497- (False , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
496+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
497+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
498498
499- (False , None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
500- (False , None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
499+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
500+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
501501
502- (False , None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
503- (False , '{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
502+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
503+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
504504
505- (False , '{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
506- (False , None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
505+ ('{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
506+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
507507
508- (False , '{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
509- (False , '{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
508+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
509+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
510510
511- (False , None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
512- (False , None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
511+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
512+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
513513
514- (False , None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
515- (False , None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
514+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
515+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
516516
517- (False , None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
518- (False , None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
517+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
518+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
519519
520- (False , None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
521- (False , None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
520+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
521+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
522522
523523 # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
524- (False , None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
524+ (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
525525])
526- def test_hello_world (think : bool , expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
526+ def test_hello_world (reasoning_format : Literal [ 'deepseek' , 'none' ] | None , expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
527527 global server
528528 server .n_slots = 1
529529 server .jinja = True
530- server .think = think
531530 server .n_ctx = 8192
532531 server .n_predict = 512 # High because of DeepSeek R1
533532 server .model_hf_repo = hf_repo
0 commit comments