add CI test

ngxson · ngxson · commit 2f30530dc118 · 2025-05-06T23:37:56.000+02:00
diff --git a/tools/server/tests/unit/test_vision_api.py b/tools/server/tests/unit/test_vision_api.py
@@ -0,0 +1,56 @@
+import pytest
+from utils import *
+import base64
+import requests
+
+server: ServerProcess
+
+IMG_URL_0 = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/test/11_truck.png"
+IMG_URL_1 = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/test/91_cat.png"
+
+response = requests.get(IMG_URL_0)
+response.raise_for_status() # Raise an exception for bad status codes
+IMG_BASE64_0 = "data:image/png;base64," + base64.b64encode(response.content).decode("utf-8")
+
+
+@pytest.fixture(autouse=True)
+def create_server():
+    global server
+    server = ServerPreset.tinygemma3()
+
+
+@pytest.mark.parametrize(
+    "image_url, success, re_content",
+    [
+        # test model is trained on CIFAR-10, but it's quite dumb due to small size
+        (IMG_URL_0,                True, "(cat)+"),
+        (IMG_BASE64_0,             True, "(cat)+"),
+        (IMG_URL_1,                True, "(frog)+"),
+        ("malformed",              False, None),
+        ("https://google.com/404", False, None), # non-existent image
+        ("https://ggml.ai",        False, None), # non-image data
+    ]
+)
+def test_vision_chat_completion(image_url, success, re_content):
+    global server
+    server.start(timeout_seconds=60) # vision model may take longer to load due to download size
+    res = server.make_request("POST", "/chat/completions", data={
+        "temperature": 0.0,
+        "top_k": 1,
+        "messages": [
+            {"role": "user", "content": [
+                {"type": "text", "text": "What is this:\n"},
+                {"type": "image_url", "image_url": {
+                    "url": image_url,
+                }},
+            ]},
+        ],
+    })
+    if success:
+        assert res.status_code == 200
+        choice = res.body["choices"][0]
+        assert "assistant" == choice["message"]["role"]
+        assert match_regex(re_content, choice["message"]["content"])
+    else:
+        assert res.status_code != 200
+
diff --git a/tools/server/tests/utils.py b/tools/server/tests/utils.py
@@ -88,6 +88,7 @@ class ServerProcess:
     chat_template: str | None = None
     chat_template_file: str | None = None
     server_path: str | None = None
+    mmproj_url: str | None = None
 
     # session variables
     process: subprocess.Popen | None = None
@@ -194,6 +195,8 @@ def start(self, timeout_seconds: int | None = DEFAULT_HTTP_TIMEOUT) -> None:
             server_args.extend(["--chat-template", self.chat_template])
         if self.chat_template_file:
             server_args.extend(["--chat-template-file", self.chat_template_file])
+        if self.mmproj_url:
+            server_args.extend(["--mmproj-url", self.mmproj_url])
 
         args = [str(arg) for arg in [server_path, *server_args]]
         print(f"tests: starting server with: {' '.join(args)}")
@@ -379,6 +382,21 @@ def jina_reranker_tiny() -> ServerProcess:
         server.server_reranking = True
         return server
 
+    @staticmethod
+    def tinygemma3() -> ServerProcess:
+        server = ServerProcess()
+        # mmproj is already provided by HF registry API
+        server.model_hf_repo = "ggml-org/tinygemma3-GGUF"
+        server.model_hf_file = "tinygemma3-Q8_0.gguf"
+        server.mmproj_url = "https://huggingface.co/ggml-org/tinygemma3-GGUF/resolve/main/mmproj-tinygemma3.gguf"
+        server.model_alias = "tinygemma3"
+        server.n_ctx = 1024
+        server.n_batch = 32
+        server.n_slots = 2
+        server.n_predict = 4
+        server.seed = 42
+        return server
+
 
 def parallel_function_calls(function_list: List[Tuple[Callable[..., Any], Tuple[Any, ...]]]) -> List[Any]:
     """