add back single GGUF format. add test

gau-nernst · gau-nernst · commit c9275ef8792e · 2025-03-19T10:15:39.000+08:00
diff --git a/engine/e2e-test/api/model/test_api_model.py b/engine/e2e-test/api/model/test_api_model.py
@@ -11,7 +11,7 @@
 class TestApiModel:
     @pytest.fixture(autouse=True)
     def setup_and_teardown(self):
-        # Setup        
+        # Setup
         success = start_server()
         if not success:
             raise Exception("Failed to start server")
@@ -20,42 +20,31 @@ def setup_and_teardown(self):
 
         # Teardown
         stop_server()
-        
-    # Pull with direct url    
+
+    # Pull with direct url
     @pytest.mark.asyncio
-    async def test_model_pull_with_direct_url_should_be_success(self):
-        run(
-            "Delete model",
-            [
-                "models",
-                "delete",
-                "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
-            ],
-        )
-        
-        myobj = {
-            "model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf"
-        }
+    @pytest.mark.parametrize(
+        "request_model",
+        [
+            "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf",
+            "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
+        ]
+    )
+    async def test_model_pull_with_direct_url_should_be_success(self, request_model):
+        model_id = "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
+        run("Delete model", ["models", "delete", model_id])
+
+        myobj = {"model": request_model}
         response = requests.post("http://localhost:3928/v1/models/pull", json=myobj)
         assert response.status_code == 200
         await wait_for_websocket_download_success_event(timeout=None)
         get_model_response = requests.get(
-            "http://127.0.0.1:3928/v1/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
+            f"http://127.0.0.1:3928/v1/models/{model_id}"
         )
         assert get_model_response.status_code == 200
-        assert (
-            get_model_response.json()["model"]
-            == "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
-        )
-        
-        run(
-            "Delete model",
-            [
-                "models",
-                "delete",
-                "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
-            ],
-        )
+        assert get_model_response.json()["model"] == model_id
+
+        run("Delete model", ["models", "delete", model_id])
 
     @pytest.mark.asyncio
     async def test_model_pull_with_direct_url_should_have_desired_name(self):
@@ -75,7 +64,7 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self):
             get_model_response.json()["name"]
             == "smol_llama_100m"
         )
-        
+
         run(
             "Delete model",
             [
@@ -84,7 +73,7 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self):
                 "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
             ],
         )
-        
+
     @pytest.mark.asyncio
     async def test_models_start_stop_should_be_successful(self):
         print("Install engine")
@@ -99,12 +88,12 @@ async def test_models_start_stop_should_be_successful(self):
         response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
         assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
         await wait_for_websocket_download_success_event(timeout=None)
-        
+
         # get API
         print("Get model")
         response = requests.get("http://localhost:3928/v1/models/tinyllama:1b")
         assert response.status_code == 200
-        
+
         # list API
         print("List model")
         response = requests.get("http://localhost:3928/v1/models")
@@ -120,7 +109,7 @@ async def test_models_start_stop_should_be_successful(self):
         print("Stop model")
         response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
         assert response.status_code == 200, f"status_code: {response.status_code}"
-                
+
         # update API
         print("Update model")
         body_json = {'model': 'tinyllama:1b'}
@@ -131,14 +120,14 @@ async def test_models_start_stop_should_be_successful(self):
         print("Delete model")
         response = requests.delete("http://localhost:3928/v1/models/tinyllama:1b")
         assert response.status_code == 200
-        
+
     def test_models_sources_api(self):
         json_body = {"source": "https://huggingface.co/cortexso/tinyllama"}
         response = requests.post(
             "http://localhost:3928/v1/models/sources", json=json_body
         )
         assert response.status_code == 200, f"status_code: {response.status_code}"
-        
+
         json_body = {"source": "https://huggingface.co/cortexso/tinyllama"}
         response = requests.delete(
             "http://localhost:3928/v1/models/sources", json=json_body
diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc
@@ -809,6 +809,17 @@ cpp::result<DownloadTask, std::string> ModelService::PullModel(
     return DownloadModelFromCortexsoAsync(
         model_and_branch[0], model_and_branch[1], desired_model_id);
 
+  // single file GGUF format - author_id:model_id:GGUF_filename
+  if (model_and_branch.size() == 3) {
+    url_parser::Url url;
+    url.protocol = "https";
+    url.host = kHuggingFaceHost;
+    url.pathParams = {model_and_branch[0], model_and_branch[1], "resolve",
+                      "main", model_and_branch[2]};
+    return HandleDownloadUrlAsync(url.ToFullPath(), desired_model_id,
+                                  desired_model_name);
+  }
+
   return cpp::fail("Invalid model handle or not supported!");
 }