@@ -58,7 +58,7 @@ def create_server():
5858 "required" :["location" ]
5959 }
6060 }
61- }# TODO: fix this crash
61+ }
6262
6363
6464def do_test_completion_with_required_tool_tiny (template_name : str , tool : dict , argument_key : str | None ):
@@ -132,8 +132,8 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
132132
133133@pytest .mark .slow
134134@pytest .mark .parametrize ("tool,argument_key,hf_repo,hf_file,template_override" , [
135- (TEST_TOOL , "success" , "lmstudio-community /Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" , None ),
136- (PYTHON_TOOL , "code" , "lmstudio-community /Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" , None ),
135+ (TEST_TOOL , "success" , "bartowski /Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" , None ),
136+ (PYTHON_TOOL , "code" , "bartowski /Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" , None ),
137137 (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it-Q4_K_M.gguf" , None ),
138138 (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it-Q4_K_M.gguf" , None ),
139139 (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct-Q4_K_M.gguf" , None ),
@@ -231,7 +231,7 @@ def test_completion_without_tool_call_fast(template_name: str, n_predict: int, t
231231@pytest .mark .slow
232232@pytest .mark .parametrize ("template_name,n_predict,tools,tool_choice" , [
233233 # TODO: fix this crash
234- # ("meetkai-functionary-medium-v3.2", 256, [], None),
234+ ("meetkai-functionary-medium-v3.2" , 256 , [], None ),
235235 ("meetkai-functionary-medium-v3.2" , 256 , [TEST_TOOL ], None ),
236236 ("meetkai-functionary-medium-v3.2" , 256 , [PYTHON_TOOL ], 'none' ),
237237 ("meetkai-functionary-medium-v3.1" , 256 , [], None ),
@@ -247,9 +247,7 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
247247
248248@pytest .mark .slow
249249@pytest .mark .parametrize ("hf_repo,hf_file,template_override" , [
250- # TODO: fix these
251- # ("lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", None),
252- # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", None),
250+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" , None ),
253251 ("bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it-Q4_K_M.gguf" , None ),
254252 ("bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct-Q4_K_M.gguf" , None ),
255253 ("bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct-Q4_K_M.gguf" , None ),
@@ -259,6 +257,8 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
259257 ("bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2-Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
260258 ("bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct-Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
261259 ("bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct-Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
260+ # TODO: fix this (times out)
261+ # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", None),
262262])
263263def test_weather_tool_call (hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
264264 global server
@@ -276,7 +276,6 @@ def test_weather_tool_call(hf_repo: str, hf_file: str, template_override: Tuple[
276276 res = server .make_request ("POST" , "/chat/completions" , data = {
277277 "max_tokens" : 256 ,
278278 "messages" : [
279- # {"role": "system", "content": "Use tools as appropriate."},
280279 {"role" : "user" , "content" : "What is the weather in Istanbul?" },
281280 ],
282281 "tools" : [WEATHER_TOOL ],
@@ -295,21 +294,21 @@ def test_weather_tool_call(hf_repo: str, hf_file: str, template_override: Tuple[
295294
296295
297296@pytest .mark .slow
298- @pytest .mark .parametrize ("expected_arguments,hf_repo,hf_file,template_override" , [
299- # TODO: fix these
300- # ('{"code":"print("}', "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", None),
301- # (None, "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF", "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
302- # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", None),
297+ @pytest .mark .parametrize ("expected_arguments_override,hf_repo,hf_file,template_override" , [
298+ (None , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it-Q4_K_M.gguf" , None ),
299+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct-Q4_K_M.gguf" , None ),
303300 (None , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2-Q8_0.gguf" , ("meetkai-functionary-medium-v3.2" , None )),
301+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf" , None ),
304302 (None , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct-Q4_K_M.gguf" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
305303 ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct-Q4_K_M.gguf" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
306- (None , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it-Q4_K_M.gguf" , None ),
307- (None , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct-Q4_K_M.gguf" , None ),
308304 (None , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct-Q4_K_M.gguf" , None ),
305+ (None , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
309306 (None , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B.Q4_K_M.gguf" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
310307 (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407-Q4_K_M.gguf" , None ),
308+ # TODO: fix this (times out)
309+ # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B-Q4_K_M.gguf", None),
311310])
312- def test_hello_world_tool_call (expected_arguments : str | None , hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
311+ def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
313312 global server
314313 server .n_slots = 1
315314 server .jinja = True
@@ -319,15 +318,14 @@ def test_hello_world_tool_call(expected_arguments: str | None, hf_repo: str, hf_
319318 server .model_hf_file = hf_file
320319 if template_override :
321320 (template_hf_repo , template_variant ) = template_override
322- server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '' ) + ('-' + template_variant if template_variant else '' )} .jinja"
321+ server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '- ' ) + ('-' + template_variant if template_variant else '' )} .jinja"
323322 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
324323 server .start (timeout_seconds = 15 * 60 )
325324 res = server .make_request ("POST" , "/chat/completions" , data = {
326325 "max_tokens" : 256 ,
327326 "messages" : [
328327 {"role" : "system" , "content" : "You are a coding assistant." },
329328 {"role" : "user" , "content" : "say hello world with python" },
330- # {"role": "user", "content": "Print a hello world message with python"},
331329 ],
332330 "tools" : [PYTHON_TOOL ],
333331 # Note: without these greedy params, Functionary v3.2 writes `def hello_world():\n print("Hello, World!")\nhello_world()` which is correct but a pain to test.
@@ -342,8 +340,8 @@ def test_hello_world_tool_call(expected_arguments: str | None, hf_repo: str, hf_
342340 tool_call = tool_calls [0 ]
343341 assert tool_call ["function" ]["name" ] == PYTHON_TOOL ["function" ]["name" ]
344342 actual_arguments = tool_call ["function" ]["arguments" ]
345- if expected_arguments is not None :
346- assert actual_arguments == expected_arguments
343+ if expected_arguments_override is not None :
344+ assert actual_arguments == expected_arguments_override
347345 else :
348346 actual_arguments = json .loads (actual_arguments )
349347 assert 'code' in actual_arguments , f"code not found in { json .dumps (actual_arguments )} "
0 commit comments