@@ -67,8 +67,8 @@ def create_server():
6767
6868
6969def do_test_completion_with_required_tool_tiny (template_name : str , tool : dict , argument_key : str | None ):
70- n_predict = 512
7170 global server
71+ n_predict = 512
7272 # server = ServerPreset.stories15m_moe()
7373 server .jinja = True
7474 server .n_predict = n_predict
@@ -139,40 +139,62 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
139139@pytest .mark .parametrize ("tool,argument_key,hf_repo,template_override" , [
140140 (TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
141141 (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
142+ (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
143+
144+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
142145 (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
143146 (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
147+
144148 (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
145149 (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
150+ (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
151+
146152 (TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
147153 (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
154+ (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
155+
148156 (TEST_TOOL , "success" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
149157 (PYTHON_TOOL , "code" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
158+ (PYTHON_TOOL , "code" , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
159+
150160 (TEST_TOOL , "success" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
151161 (PYTHON_TOOL , "code" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
162+ (PYTHON_TOOL , "code" , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
163+
152164 (TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
153165 (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
154- (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
155- (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
166+ (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
167+
168+ (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , ("meetkai/functionary-medium-v3.2" , None )),
169+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , ("meetkai/functionary-medium-v3.2" , None )),
170+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF:Q4_K_M" , "chatml" ),
171+
156172 (TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
157173 (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
174+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
175+
158176 (TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
159177 (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
178+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
160179 # TODO: fix these
161180 # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
162181 # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
163182])
164- def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
183+ def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
184+ global server
165185 n_predict = 512
166186 server .n_slots = 1
167187 server .jinja = True
168188 server .n_ctx = 8192
169189 server .n_predict = n_predict
170190 server .model_hf_repo = hf_repo
171191 server .model_hf_file = None
172- if template_override :
192+ if isinstance ( template_override , tuple ) :
173193 (template_hf_repo , template_variant ) = template_override
174194 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
175195 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
196+ elif isinstance (template_override , str ):
197+ server .chat_template = template_override
176198 server .start (timeout_seconds = TIMEOUT_SERVER_START )
177199 res = server .make_request ("POST" , "/chat/completions" , data = {
178200 "max_tokens" : n_predict ,
@@ -253,17 +275,35 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
253275@pytest .mark .parametrize ("hf_repo,template_override" , [
254276 ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
255277 ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
256- ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
278+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
279+
257280 ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
281+ ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
282+
258283 ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
259- ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
260- ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
284+ ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
285+
286+ ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
287+ ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
288+
289+ ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
290+ ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
291+
261292 ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
293+ ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
294+
262295 ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
296+ ("bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
297+
263298 ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
299+ ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
300+
301+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
302+ ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
303+
264304 # ("bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
265305])
266- def test_weather_tool_call (hf_repo : str , template_override : Tuple [str , str | None ] | None ):
306+ def test_weather_tool_call (hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
267307 global server
268308 n_predict = 512
269309 server .n_slots = 1
@@ -272,10 +312,12 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
272312 server .n_predict = n_predict
273313 server .model_hf_repo = hf_repo
274314 server .model_hf_file = None
275- if template_override :
315+ if isinstance ( template_override , tuple ) :
276316 (template_hf_repo , template_variant ) = template_override
277317 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
278318 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
319+ elif isinstance (template_override , str ):
320+ server .chat_template = template_override
279321 server .start (timeout_seconds = TIMEOUT_SERVER_START )
280322 res = server .make_request ("POST" , "/chat/completions" , data = {
281323 "max_tokens" : n_predict ,
@@ -301,29 +343,52 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
301343@pytest .mark .slow
302344@pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
303345 (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , None ),
304- (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
346+ (None , "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" , "chatml" ),
347+
305348 (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , None ),
349+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M" , "chatml" ),
350+
306351 (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
307- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
308- (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
352+ (None , "bartowski/functionary-small-v3.2-GGUF:Q8_0" , "chatml" ),
353+
354+ (None , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , None ),
355+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" , "chatml" ),
356+
357+ ('{"code":"print("}' , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
358+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M" , "chatml" ),
359+
309360 ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
361+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M" , "chatml" ),
362+
310363 (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , None ),
311- (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
312- (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
364+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M" , "chatml" ),
365+
366+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
367+ (None , "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M" , "chatml" ),
368+
369+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
370+ (None , "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M" , "chatml" ),
371+
313372 (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , None ),
373+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M" , "chatml" ),
374+
375+ # Note: gemma-2-2b-it knows itself as "model", not "assistant", so we don't test the ill-suited chatml on it.
376+ (None , "bartowski/gemma-2-2b-it-GGUF:Q4_K_M" , None ),
314377])
315- def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
378+ def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : str | Tuple [str , str | None ] | None ):
316379 global server
317380 server .n_slots = 1
318381 server .jinja = True
319382 server .n_ctx = 8192
320383 server .n_predict = 512 # High because of DeepSeek R1
321384 server .model_hf_repo = hf_repo
322385 server .model_hf_file = None
323- if template_override :
386+ if isinstance ( template_override , tuple ) :
324387 (template_hf_repo , template_variant ) = template_override
325388 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
326389 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
390+ elif isinstance (template_override , str ):
391+ server .chat_template = template_override
327392 server .start (timeout_seconds = TIMEOUT_SERVER_START )
328393 res = server .make_request ("POST" , "/chat/completions" , data = {
329394 "max_tokens" : 256 ,
0 commit comments