@@ -274,43 +274,44 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
274274
275275
276276@pytest .mark .slow
277- @pytest .mark .parametrize ("hf_repo,template_override" , [
278- ("bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M" , ("CohereForAI/c4ai-command-r7b-12-2024" , "tool_use" )),
277+ @pytest .mark .parametrize ("think, hf_repo,template_override" , [
278+ (True , "bartowski/c4ai-command-r7b-12-2024-GGUF:Q4_K_M" , ("CohereForAI/c4ai-command-r7b-12-2024" , "tool_use" )),
279279
280- ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
281- ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
280+ (False , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
281+ (False , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
282282
283- ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
284- ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
283+ (False , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
284+ (False , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
285285
286- ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
287- ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
286+ (False , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
287+ (False , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
288288
289- ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
290- ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
289+ (False , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
290+ (False , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
291291
292- ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
293- ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
292+ (False , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
293+ (False , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
294294
295- ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
296- ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
295+ (False , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
296+ (False , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
297297
298- ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
299- ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
298+ (False , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
299+ (False , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
300300
301- ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
302- ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
301+ (False , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
302+ (False , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
303303
304- ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
304+ (True , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
305305
306306 # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
307- ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
307+ (False , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
308308
309309 # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
310310])
311- def test_weather (hf_repo : str , template_override : Tuple [str , str | None ] | None ):
311+ def test_weather (think : bool , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
312312 global server
313313 n_predict = 512
314+ server .think = think
314315 server .n_slots = 1
315316 server .jinja = True
316317 server .n_ctx = 8192
@@ -488,44 +489,45 @@ def test_thoughts(n_predict: int, think: bool, expect_content: str | None, expec
488489
489490
490491@pytest .mark .slow
491- @pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
492- (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
493- (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , "chatml" ),
492+ @pytest .mark .parametrize ("think, expected_arguments_override,hf_repo,template_override" , [
493+ (True , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
494+ (True , None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , "chatml" ),
494495
495- (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
496- (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
496+ (False , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
497+ (False , None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
497498
498- (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
499- (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
499+ (False , None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
500+ (False , None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
500501
501- (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
502- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
502+ (False , None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
503+ (False , '{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
503504
504- ('{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
505- (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
505+ (False , '{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
506+ (False , None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
506507
507- ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
508- ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
508+ (False , '{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
509+ (False , '{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
509510
510- (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
511- (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
511+ (False , None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
512+ (False , None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
512513
513- (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
514- (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
514+ (False , None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
515+ (False , None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
515516
516- (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
517- (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
517+ (False , None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
518+ (False , None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
518519
519- (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
520- (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
520+ (False , None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
521+ (False , None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
521522
522523 # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
523- (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
524+ (False , None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
524525])
525- def test_hello_world_tool_call ( expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
526+ def test_hello_world ( think : bool , expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
526527 global server
527528 server .n_slots = 1
528529 server .jinja = True
530+ server .think = think
529531 server .n_ctx = 8192
530532 server .n_predict = 512 # High because of DeepSeek R1
531533 server .model_hf_repo = hf_repo
0 commit comments