Update scripts/fetch_server_test_models.py to new compact hf_repo syntax + switch Hermes models

Olivier Chafik · Olivier Chafik · commit 9685043274f8 · 2025-01-30T12:05:07.000Z
diff --git a/examples/server/tests/unit/test_tool_call.py b/examples/server/tests/unit/test_tool_call.py
@@ -143,10 +143,10 @@ def test_completion_with_required_tool_tiny_slow(template_name: str, tool: dict,
     (PYTHON_TOOL,  "code",     "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      None),
     (TEST_TOOL,    "success",  "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M",        None),
     (PYTHON_TOOL,  "code",     "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M",        None),
-    (TEST_TOOL,    "success",  "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
-    (PYTHON_TOOL,  "code",     "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
-    (TEST_TOOL,    "success",  "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
-    (PYTHON_TOOL,  "code",     "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
+    (TEST_TOOL,    "success",  "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
+    (PYTHON_TOOL,  "code",     "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
+    (TEST_TOOL,    "success",  "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
+    (PYTHON_TOOL,  "code",     "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
     (TEST_TOOL,    "success",  "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
     (PYTHON_TOOL,  "code",     "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
     (TEST_TOOL,    "success",  "bartowski/functionary-small-v3.2-GGUF:Q8_0",       ("meetkai/functionary-medium-v3.2", None)),
@@ -252,8 +252,8 @@ def test_completion_without_tool_call_slow(template_name: str, n_predict: int, t
     ("bartowski/gemma-2-2b-it-GGUF:Q4_K_M",              None),
     ("bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M",      None),
     ("bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M",        None),
-    ("NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
-    ("NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
+    ("bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M", ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
+    ("bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch/Hermes-3-Llama-3.1-8B", "tool_use")),
     ("bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
     ("bartowski/functionary-small-v3.2-GGUF:Q8_0",       ("meetkai/functionary-medium-v3.2", None)),
     ("bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M",      ("meta-llama/Llama-3.2-3B-Instruct", None)),
@@ -301,8 +301,8 @@ def test_weather_tool_call(hf_repo: str, template_override: Tuple[str, str | Non
     (None,                 "bartowski/Llama-3.2-1B-Instruct-GGUF:Q4_K_M",      ("meta-llama-Llama-3.2-3B-Instruct", None)),
     ('{"code":"print("}',  "bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M",      ("meta-llama-Llama-3.2-3B-Instruct", None)),
     (None,                 "bartowski/Qwen2.5-7B-Instruct-GGUF:Q4_K_M",        None),
-    (None,                 "NousResearch/Hermes-2-Pro-Llama-3-8B:Q4_K_M",      ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
-    (None,                 "NousResearch/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
+    (None,                 "bartowski/Hermes-2-Pro-Llama-3-8B-GGUF:Q4_K_M",      ("NousResearch/Hermes-2-Pro-Llama-3-8B", "tool_use")),
+    (None,                 "bartowski/Hermes-3-Llama-3.1-8B-GGUF:Q4_K_M",   ("NousResearch-Hermes-3-Llama-3.1-8B", "tool_use")),
     (None,                 "bartowski/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", None),
     # (None,                 "bartowski/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M", None),
 ])
diff --git a/scripts/fetch_server_test_models.py b/scripts/fetch_server_test_models.py
@@ -16,12 +16,13 @@
 import os
 from typing import Generator
 from pydantic import BaseModel
+from typing import *
 import subprocess
 
 
 class HuggingFaceModel(BaseModel):
     hf_repo: str
-    hf_file: str
+    hf_file: Optional[str] = None
 
     class Config:
         frozen = True
@@ -40,7 +41,7 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
             for dec in node.decorator_list:
                 if isinstance(dec, ast.Call) and isinstance(dec.func, ast.Attribute) and dec.func.attr == 'parametrize':
                     param_names = ast.literal_eval(dec.args[0]).split(",")
-                    if "hf_repo" not in param_names or "hf_file" not in param_names:
+                    if "hf_repo" not in param_names:
                         continue
 
                     raw_param_values = dec.args[1]
@@ -49,15 +50,15 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
                         continue
 
                     hf_repo_idx = param_names.index("hf_repo")
-                    hf_file_idx = param_names.index("hf_file")
+                    hf_file_idx = param_names.index("hf_file") if "hf_file" in param_names else None
 
                     for t in raw_param_values.elts:
                         if not isinstance(t, ast.Tuple):
                             logging.warning(f'Skipping non-tuple parametrize entry at {test_file}:{node.lineno}')
                             continue
                         yield HuggingFaceModel(
                             hf_repo=ast.literal_eval(t.elts[hf_repo_idx]),
-                            hf_file=ast.literal_eval(t.elts[hf_file_idx]))
+                            hf_file=ast.literal_eval(t.elts[hf_file_idx]) if hf_file_idx is not None else None)
 
 
 if __name__ == '__main__':
@@ -80,14 +81,22 @@ def collect_hf_model_test_parameters(test_file) -> Generator[HuggingFaceModel, N
             '../build/bin/Release/llama-cli.exe' if os.name == 'nt' else '../build/bin/llama-cli'))
 
     for m in models:
-        if '<' in m.hf_repo or '<' in m.hf_file:
+        if '<' in m.hf_repo or (m.hf_file is not None and '<' in m.hf_file):
             continue
-        if '-of-' in m.hf_file:
+        if m.hf_file is not None and '-of-' in m.hf_file:
             logging.warning(f'Skipping model at {m.hf_repo} / {m.hf_file} because it is a split file')
             continue
         logging.info(f'Using llama-cli to ensure model {m.hf_repo}/{m.hf_file} was fetched')
-        cmd = [cli_path, '-hfr', m.hf_repo, '-hff', m.hf_file, '-n', '1', '-p', 'Hey', '--no-warmup', '--log-disable', '-no-cnv']
-        if m.hf_file != 'tinyllamas/stories260K.gguf' and not m.hf_file.startswith('Mistral-Nemo'):
+        cmd = [
+            cli_path,
+            '-hfr', m.hf_repo,
+            *([] if m.hf_file is None else ['-hff', m.hf_file]),
+            '-n', '1',
+            '-p', 'Hey',
+            '--no-warmup',
+            '--log-disable',
+            '-no-cnv']
+        if m.hf_file != 'tinyllamas/stories260K.gguf' and 'Mistral-Nemo' not in m.hf_repo:
             cmd.append('-fa')
         try:
             subprocess.check_call(cmd)