Skip to content

Commit 384f54a

Browse files
author
Olivier Chafik
committed
Split bulk of tool call tests to slow lane
1 parent 923c805 commit 384f54a

File tree

1 file changed

+61
-34
lines changed

1 file changed

+61
-34
lines changed

examples/server/tests/unit/test_tool_call.py

Lines changed: 61 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -61,28 +61,7 @@ def create_server():
6161
}
6262

6363

64-
@pytest.mark.parametrize("template_name,tool,argument_key", [
65-
("meta-llama-Meta-Llama-3.1-8B-Instruct", TEST_TOOL, "success"),
66-
("meta-llama-Meta-Llama-3.1-8B-Instruct", PYTHON_TOOL, "code"),
67-
("meetkai-functionary-medium-v3.1", TEST_TOOL, "success"),
68-
("meetkai-functionary-medium-v3.1", PYTHON_TOOL, "code"),
69-
("meetkai-functionary-medium-v3.2", TEST_TOOL, "success"),
70-
("meetkai-functionary-medium-v3.2", PYTHON_TOOL, "code"),
71-
("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", TEST_TOOL, "success"),
72-
("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", PYTHON_TOOL, "code"),
73-
("meta-llama-Llama-3.2-3B-Instruct", TEST_TOOL, "success"),
74-
("meta-llama-Llama-3.2-3B-Instruct", PYTHON_TOOL, "code"),
75-
("mistralai-Mistral-Nemo-Instruct-2407", TEST_TOOL, "success"),
76-
("mistralai-Mistral-Nemo-Instruct-2407", PYTHON_TOOL, "code"),
77-
("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", TEST_TOOL, "success"),
78-
("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", PYTHON_TOOL, "code"),
79-
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", TEST_TOOL, "success"),
80-
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"),
81-
("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"),
82-
("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"),
83-
# TODO: fix these
84-
])
85-
def test_completion_with_required_tool_tiny(template_name: str, tool: dict, argument_key: str | None):
64+
def do_test_completion_with_required_tool_tiny(template_name: str, tool: dict, argument_key: str | None):
8665
n_predict = 512
8766
global server
8867
# server = ServerPreset.stories15m_moe()
@@ -117,6 +96,40 @@ def test_completion_with_required_tool_tiny(template_name: str, tool: dict, argu
11796
assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}"
11897

11998

99+
@pytest.mark.parametrize("template_name,tool,argument_key", [
100+
("google-gemma-2-2b-it", TEST_TOOL, "success"),
101+
("meta-llama-Llama-3.3-70B-Instruct", TEST_TOOL, "success"),
102+
("meta-llama-Llama-3.3-70B-Instruct", PYTHON_TOOL, "code"),
103+
])
104+
def test_completion_with_required_tool_tiny_fast(template_name: str, tool: dict, argument_key: str | None):
105+
do_test_completion_with_required_tool_tiny(template_name, tool, argument_key)
106+
107+
108+
@pytest.mark.slow
109+
@pytest.mark.parametrize("template_name,tool,argument_key", [
110+
("meta-llama-Meta-Llama-3.1-8B-Instruct", TEST_TOOL, "success"),
111+
("meta-llama-Meta-Llama-3.1-8B-Instruct", PYTHON_TOOL, "code"),
112+
("meetkai-functionary-medium-v3.1", TEST_TOOL, "success"),
113+
("meetkai-functionary-medium-v3.1", PYTHON_TOOL, "code"),
114+
("meetkai-functionary-medium-v3.2", TEST_TOOL, "success"),
115+
("meetkai-functionary-medium-v3.2", PYTHON_TOOL, "code"),
116+
("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", TEST_TOOL, "success"),
117+
("NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use", PYTHON_TOOL, "code"),
118+
("meta-llama-Llama-3.2-3B-Instruct", TEST_TOOL, "success"),
119+
("meta-llama-Llama-3.2-3B-Instruct", PYTHON_TOOL, "code"),
120+
("mistralai-Mistral-Nemo-Instruct-2407", TEST_TOOL, "success"),
121+
("mistralai-Mistral-Nemo-Instruct-2407", PYTHON_TOOL, "code"),
122+
("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", TEST_TOOL, "success"),
123+
("NousResearch-Hermes-3-Llama-3.1-8B-tool_use", PYTHON_TOOL, "code"),
124+
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", TEST_TOOL, "success"),
125+
("deepseek-ai-DeepSeek-R1-Distill-Llama-8B", PYTHON_TOOL, "code"),
126+
("fireworks-ai-llama-3-firefunction-v2", TEST_TOOL, "success"),
127+
("fireworks-ai-llama-3-firefunction-v2", PYTHON_TOOL, "code"),
128+
])
129+
def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict, argument_key: str | None):
130+
do_test_completion_with_required_tool_tiny(template_name, tool, argument_key)
131+
132+
120133
@pytest.mark.slow
121134
@pytest.mark.parametrize("tool,argument_key,hf_repo,hf_file,template_override", [
122135
(TEST_TOOL, "success", "lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", None),
@@ -183,18 +196,7 @@ def test_completion_with_required_tool_real_model(tool: dict, argument_key: str
183196
assert argument_key in actual_arguments, f"tool arguments: {json.dumps(actual_arguments)}, expected: {argument_key}"
184197

185198

186-
@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [
187-
("meetkai-functionary-medium-v3.1", 128, [], None),
188-
("meetkai-functionary-medium-v3.1", 128, [TEST_TOOL], None),
189-
("meetkai-functionary-medium-v3.1", 128, [PYTHON_TOOL], 'none'),
190-
("meetkai-functionary-medium-v3.2", 128, [], None),
191-
("meetkai-functionary-medium-v3.2", 128, [TEST_TOOL], None),
192-
("meetkai-functionary-medium-v3.2", 128, [PYTHON_TOOL], 'none'),
193-
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, [], None),
194-
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, [TEST_TOOL], None),
195-
("meta-llama-Meta-Llama-3.1-8B-Instruct", 128, [PYTHON_TOOL], 'none'),
196-
])
197-
def test_completion_without_tool_call(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
199+
def do_test_completion_without_tool_call(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
198200
global server
199201
server.jinja = True
200202
server.n_predict = n_predict
@@ -217,6 +219,31 @@ def test_completion_without_tool_call(template_name: str, n_predict: int, tools:
217219
assert choice["message"].get("tool_calls") is None, f'Expected no tool call in {choice["message"]}'
218220

219221

222+
@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [
223+
("meta-llama-Llama-3.3-70B-Instruct", 128, [], None),
224+
("meta-llama-Llama-3.3-70B-Instruct", 128, [TEST_TOOL], None),
225+
("meta-llama-Llama-3.3-70B-Instruct", 128, [PYTHON_TOOL], 'none'),
226+
])
227+
def test_completion_without_tool_call_fast(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
228+
do_test_completion_without_tool_call(template_name, n_predict, tools, tool_choice)
229+
230+
231+
@pytest.mark.slow
232+
@pytest.mark.parametrize("template_name,n_predict,tools,tool_choice", [
233+
("meetkai-functionary-medium-v3.1", 128, [], None),
234+
("meetkai-functionary-medium-v3.1", 128, [TEST_TOOL], None),
235+
("meetkai-functionary-medium-v3.1", 128, [PYTHON_TOOL], 'none'),
236+
("meetkai-functionary-medium-v3.2", 128, [], None),
237+
("meetkai-functionary-medium-v3.2", 128, [TEST_TOOL], None),
238+
("meetkai-functionary-medium-v3.2", 128, [PYTHON_TOOL], 'none'),
239+
("meta-llama-Llama-3.2-3B-Instruct", 128, [], None),
240+
("meta-llama-Llama-3.2-3B-Instruct", 128, [TEST_TOOL], None),
241+
("meta-llama-Llama-3.2-3B-Instruct", 128, [PYTHON_TOOL], 'none'),
242+
])
243+
def test_completion_without_tool_call_slow(template_name: str, n_predict: int, tools: list[dict], tool_choice: str | None):
244+
do_test_completion_without_tool_call(template_name, n_predict, tools, tool_choice)
245+
246+
220247
@pytest.mark.slow
221248
@pytest.mark.parametrize("hf_repo,hf_file,template_override", [
222249
("lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF", "Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", None),

0 commit comments

Comments
 (0)