33
44server : ServerProcess
55
6+ TIMEOUT_SERVER_START = 15 * 60
7+ TIMEOUT_HTTP_REQUEST = 60
8+
69@pytest .fixture (autouse = True )
710def create_server ():
811 global server
@@ -107,8 +110,8 @@ def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict,
107110
108111@pytest .mark .slow
109112@pytest .mark .parametrize ("template_name,tool,argument_key" , [
110- ("meta-llama-Llama-3.1-8B-Instruct" , TEST_TOOL , "success" ),
111- ("meta-llama-Llama-3.1-8B-Instruct" , PYTHON_TOOL , "code" ),
113+ ("meta-llama-Llama-3.1-8B-Instruct" , TEST_TOOL , "success" ),
114+ ("meta-llama-Llama-3.1-8B-Instruct" , PYTHON_TOOL , "code" ),
112115 ("meetkai-functionary-medium-v3.1" , TEST_TOOL , "success" ),
113116 ("meetkai-functionary-medium-v3.1" , PYTHON_TOOL , "code" ),
114117 ("meetkai-functionary-medium-v3.2" , TEST_TOOL , "success" ),
@@ -131,44 +134,43 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
131134
132135
133136@pytest .mark .slow
134- @pytest .mark .parametrize ("tool,argument_key,hf_repo,hf_file, template_override" , [
135- (TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
136- (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
137- (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
138- (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
139- (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
140- (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
141- (TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
142- (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
143- (TEST_TOOL , "success" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf " , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
144- (PYTHON_TOOL , "code" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf " , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
145- (TEST_TOOL , "success" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
146- (PYTHON_TOOL , "code" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
147- (TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
148- (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
149- (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
150- (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
151- (TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
152- (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
153- (TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
154- (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
137+ @pytest .mark .parametrize ("tool,argument_key,hf_repo,template_override" , [
138+ (TEST_TOOL , "success" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
139+ (PYTHON_TOOL , "code" , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
140+ (TEST_TOOL , "success" , "bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
141+ (PYTHON_TOOL , "code" , "bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
142+ (TEST_TOOL , "success" , "bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
143+ (PYTHON_TOOL , "code" , "bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
144+ (TEST_TOOL , "success" , "bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
145+ (PYTHON_TOOL , "code" , "bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
146+ (TEST_TOOL , "success" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
147+ (PYTHON_TOOL , "code" , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
148+ (TEST_TOOL , "success" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
149+ (PYTHON_TOOL , "code" , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
150+ (TEST_TOOL , "success" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
151+ (PYTHON_TOOL , "code" , "bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
152+ (TEST_TOOL , "success" , "bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
153+ (PYTHON_TOOL , "code" , "bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
154+ (TEST_TOOL , "success" , "bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
155+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
156+ (TEST_TOOL , "success" , "bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
157+ (PYTHON_TOOL , "code" , "bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
155158 # TODO: fix these
156- # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
157- # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
159+ # (TEST_TOOL, "success", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
160+ # (PYTHON_TOOL, "code", "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
158161])
159- def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
162+ def test_completion_with_required_tool_real_model (tool : dict , argument_key : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
160163 n_predict = 512
161164 server .n_slots = 1
162165 server .jinja = True
163166 server .n_ctx = 8192
164167 server .n_predict = n_predict
165168 server .model_hf_repo = hf_repo
166- server .model_hf_file = hf_file
167169 if template_override :
168170 (template_hf_repo , template_variant ) = template_override
169171 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
170172 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
171- server .start ()
173+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
172174 res = server .make_request ("POST" , "/chat/completions" , data = {
173175 "max_tokens" : n_predict ,
174176 "messages" : [
@@ -181,7 +183,7 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
181183 "temperature" : 0.0 ,
182184 "top_k" : 1 ,
183185 "top_p" : 1.0 ,
184- })
186+ }, timeout = TIMEOUT_HTTP_REQUEST )
185187 assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
186188 choice = res .body ["choices" ][0 ]
187189 tool_calls = choice ["message" ].get ("tool_calls" )
@@ -201,7 +203,7 @@ def do_test_completion_without_tool_call(template_name: str, n_predict: int, too
201203 server .jinja = True
202204 server .n_predict = n_predict
203205 server .chat_template_file = f'../../../models/templates/{ template_name } .jinja'
204- server .start ()
206+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
205207 res = server .make_request ("POST" , "/chat/completions" , data = {
206208 "max_tokens" : n_predict ,
207209 "messages" : [
@@ -213,7 +215,7 @@ def do_test_completion_without_tool_call(template_name: str, n_predict: int, too
213215 "temperature" : 0.0 ,
214216 "top_k" : 1 ,
215217 "top_p" : 1.0 ,
216- })
218+ }, timeout = TIMEOUT_HTTP_REQUEST )
217219 assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
218220 choice = res .body ["choices" ][0 ]
219221 assert choice ["message" ].get ("tool_calls" ) is None , f'Expected no tool call in { choice ["message" ]} '
@@ -245,39 +247,38 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
245247
246248
247249@pytest .mark .slow
248- @pytest .mark .parametrize ("hf_repo,hf_file, template_override" , [
249- ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
250- ("bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
251- ("bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
252- ("bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
253- ("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf " , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
254- ("NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
255- ("bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
256- ("bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai/functionary-medium-v3.2" , None )),
257- ("bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
258- # ("bartowski/Llama-3.2-1B-Instruct-GGUF", "Llama-3.2-1B-Instruct- Q4_K_M.gguf ", ("meta-llama/Llama-3.2-3B-Instruct", None)),
259- # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
250+ @pytest .mark .parametrize ("hf_repo,template_override" , [
251+ ("bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
252+ ("bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
253+ ("bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
254+ ("bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
255+ ("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
256+ ("NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch/Hermes-3-Llama-3.1-8B" , "tool_use" )),
257+ ("bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
258+ ("bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai/functionary-medium-v3.2" , None )),
259+ ("bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama/Llama-3.2-3B-Instruct" , None )),
260+ # ("bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M", ("meta-llama/Llama-3.2-3B-Instruct", None)),
261+ # ("bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
260262])
261- def test_weather_tool_call (hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
263+ def test_weather_tool_call (hf_repo : str , template_override : Tuple [str , str | None ] | None ):
262264 global server
263265 server .n_slots = 1
264266 server .jinja = True
265267 server .n_ctx = 8192
266268 server .n_predict = 512
267269 server .model_hf_repo = hf_repo
268- server .model_hf_file = hf_file
269270 if template_override :
270271 (template_hf_repo , template_variant ) = template_override
271272 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
272273 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
273- server .start (timeout_seconds = 15 * 60 )
274+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
274275 res = server .make_request ("POST" , "/chat/completions" , data = {
275276 "max_tokens" : 256 ,
276277 "messages" : [
277278 {"role" : "user" , "content" : "What is the weather in Istanbul?" },
278279 ],
279280 "tools" : [WEATHER_TOOL ],
280- })
281+ }, timeout = TIMEOUT_HTTP_REQUEST )
281282 assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
282283 choice = res .body ["choices" ][0 ]
283284 tool_calls = choice ["message" ].get ("tool_calls" )
@@ -292,32 +293,31 @@ def test_weather_tool_call(hf_repo: str, hf_file: str, template_override: Tuple[
292293
293294
294295@pytest .mark .slow
295- @pytest .mark .parametrize ("expected_arguments_override,hf_repo,hf_file, template_override" , [
296- (None , "bartowski/gemma-2-2b-it-GGUF" , "gemma-2-2b-it- Q4_K_M.gguf" , None ),
297- (None , "bartowski/Phi-3.5-mini-instruct-GGUF" , "Phi-3.5-mini-instruct- Q4_K_M.gguf" , None ),
298- (None , "bartowski/functionary-small-v3.2-GGUF" , "functionary-small-v3.2- Q8_0.gguf" , ("meetkai-functionary-medium-v3.2" , None )),
299- ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" , "Meta-Llama-3.1-8B-Instruct- Q4_K_M.gguf " , None ),
300- (None , "bartowski/Llama-3.2-1B-Instruct-GGUF" , "Llama-3.2-1B-Instruct- Q4_K_M.gguf" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
301- ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF" , "Llama-3.2-3B-Instruct- Q4_K_M.gguf" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
302- (None , "bartowski/Qwen2.5-7B-Instruct-GGUF" , "Qwen2.5-7B-Instruct- Q4_K_M.gguf" , None ),
303- (None , "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF" , "Hermes-2-Pro-Llama-3-8B- Q4_K_M.gguf" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
304- (None , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF" , "Hermes-3-Llama-3.1-8B. Q4_K_M.gguf" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
305- (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF" , "Mistral-Nemo-Instruct-2407- Q4_K_M.gguf " , None ),
306- # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF", "DeepSeek-R1-Distill-Qwen-7B- Q4_K_M.gguf ", None),
296+ @pytest .mark .parametrize ("expected_arguments_override,hf_repo,template_override" , [
297+ (None , "bartowski/gemma-2-2b-it-GGUF: Q4_K_M" , None ),
298+ (None , "bartowski/Phi-3.5-mini-instruct-GGUF: Q4_K_M" , None ),
299+ (None , "bartowski/functionary-small-v3.2-GGUF: Q8_0" , ("meetkai-functionary-medium-v3.2" , None )),
300+ ('{"code":"print("}' , "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF: Q4_K_M" , None ),
301+ (None , "bartowski/Llama-3.2-1B-Instruct-GGUF: Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
302+ ('{"code":"print("}' , "bartowski/Llama-3.2-3B-Instruct-GGUF: Q4_K_M" , ("meta-llama-Llama-3.2-3B-Instruct" , None )),
303+ (None , "bartowski/Qwen2.5-7B-Instruct-GGUF: Q4_K_M" , None ),
304+ (None , "NousResearch/Hermes-2-Pro-Llama-3-8B: Q4_K_M" , ("NousResearch/Hermes-2-Pro-Llama-3-8B" , "tool_use" )),
305+ (None , "NousResearch/Hermes-3-Llama-3.1-8B-GGUF: Q4_K_M" , ("NousResearch-Hermes-3-Llama-3.1-8B" , "tool_use" )),
306+ (None , "bartowski/Mistral-Nemo-Instruct-2407-GGUF: Q4_K_M" , None ),
307+ # (None, "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF: Q4_K_M", None),
307308])
308- def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , hf_file : str , template_override : Tuple [str , str | None ] | None ):
309+ def test_hello_world_tool_call (expected_arguments_override : str | None , hf_repo : str , template_override : Tuple [str , str | None ] | None ):
309310 global server
310311 server .n_slots = 1
311312 server .jinja = True
312313 server .n_ctx = 8192
313314 server .n_predict = 128
314315 server .model_hf_repo = hf_repo
315- server .model_hf_file = hf_file
316316 if template_override :
317317 (template_hf_repo , template_variant ) = template_override
318318 server .chat_template_file = f"../../../models/templates/{ template_hf_repo .replace ('/' , '-' ) + ('-' + template_variant if template_variant else '' )} .jinja"
319319 assert os .path .exists (server .chat_template_file ), f"Template file { server .chat_template_file } does not exist. Run `python scripts/get_chat_template.py { template_hf_repo } { template_variant } > { server .chat_template_file } ` to download the template."
320- server .start (timeout_seconds = 15 * 60 )
320+ server .start (timeout_seconds = TIMEOUT_SERVER_START )
321321 res = server .make_request ("POST" , "/chat/completions" , data = {
322322 "max_tokens" : 256 ,
323323 "messages" : [
@@ -329,7 +329,7 @@ def test_hello_world_tool_call(expected_arguments_override: str | None, hf_repo:
329329 "temperature" : 0.0 ,
330330 "top_k" : 1 ,
331331 "top_p" : 1.0 ,
332- })
332+ }, timeout = TIMEOUT_HTTP_REQUEST )
333333 assert res .status_code == 200 , f"Expected status code 200, got { res .status_code } "
334334 choice = res .body ["choices" ][0 ]
335335 tool_calls = choice ["message" ].get ("tool_calls" )
0 commit comments