Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit c9275ef

Browse files
committed
add back single GGUF format. add test
1 parent 4178d5c commit c9275ef

File tree

2 files changed

+37
-37
lines changed

2 files changed

+37
-37
lines changed

engine/e2e-test/api/model/test_api_model.py

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
class TestApiModel:
1212
@pytest.fixture(autouse=True)
1313
def setup_and_teardown(self):
14-
# Setup
14+
# Setup
1515
success = start_server()
1616
if not success:
1717
raise Exception("Failed to start server")
@@ -20,42 +20,31 @@ def setup_and_teardown(self):
2020

2121
# Teardown
2222
stop_server()
23-
24-
# Pull with direct url
23+
24+
# Pull with direct url
2525
@pytest.mark.asyncio
26-
async def test_model_pull_with_direct_url_should_be_success(self):
27-
run(
28-
"Delete model",
29-
[
30-
"models",
31-
"delete",
32-
"afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
33-
],
34-
)
35-
36-
myobj = {
37-
"model": "https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf"
38-
}
26+
@pytest.mark.parametrize(
27+
"request_model",
28+
[
29+
"https://huggingface.co/afrideva/zephyr-smol_llama-100m-sft-full-GGUF/blob/main/zephyr-smol_llama-100m-sft-full.q2_k.gguf",
30+
"afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
31+
]
32+
)
33+
async def test_model_pull_with_direct_url_should_be_success(self, request_model):
34+
model_id = "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
35+
run("Delete model", ["models", "delete", model_id])
36+
37+
myobj = {"model": request_model}
3938
response = requests.post("http://localhost:3928/v1/models/pull", json=myobj)
4039
assert response.status_code == 200
4140
await wait_for_websocket_download_success_event(timeout=None)
4241
get_model_response = requests.get(
43-
"http://127.0.0.1:3928/v1/models/afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
42+
f"http://127.0.0.1:3928/v1/models/{model_id}"
4443
)
4544
assert get_model_response.status_code == 200
46-
assert (
47-
get_model_response.json()["model"]
48-
== "afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf"
49-
)
50-
51-
run(
52-
"Delete model",
53-
[
54-
"models",
55-
"delete",
56-
"afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
57-
],
58-
)
45+
assert get_model_response.json()["model"] == model_id
46+
47+
run("Delete model", ["models", "delete", model_id])
5948

6049
@pytest.mark.asyncio
6150
async def test_model_pull_with_direct_url_should_have_desired_name(self):
@@ -75,7 +64,7 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self):
7564
get_model_response.json()["name"]
7665
== "smol_llama_100m"
7766
)
78-
67+
7968
run(
8069
"Delete model",
8170
[
@@ -84,7 +73,7 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self):
8473
"afrideva:zephyr-smol_llama-100m-sft-full-GGUF:zephyr-smol_llama-100m-sft-full.q2_k.gguf",
8574
],
8675
)
87-
76+
8877
@pytest.mark.asyncio
8978
async def test_models_start_stop_should_be_successful(self):
9079
print("Install engine")
@@ -99,12 +88,12 @@ async def test_models_start_stop_should_be_successful(self):
9988
response = requests.post("http://localhost:3928/v1/models/pull", json=json_body)
10089
assert response.status_code == 200, f"Failed to pull model: tinyllama:1b"
10190
await wait_for_websocket_download_success_event(timeout=None)
102-
91+
10392
# get API
10493
print("Get model")
10594
response = requests.get("http://localhost:3928/v1/models/tinyllama:1b")
10695
assert response.status_code == 200
107-
96+
10897
# list API
10998
print("List model")
11099
response = requests.get("http://localhost:3928/v1/models")
@@ -120,7 +109,7 @@ async def test_models_start_stop_should_be_successful(self):
120109
print("Stop model")
121110
response = requests.post("http://localhost:3928/v1/models/stop", json=json_body)
122111
assert response.status_code == 200, f"status_code: {response.status_code}"
123-
112+
124113
# update API
125114
print("Update model")
126115
body_json = {'model': 'tinyllama:1b'}
@@ -131,14 +120,14 @@ async def test_models_start_stop_should_be_successful(self):
131120
print("Delete model")
132121
response = requests.delete("http://localhost:3928/v1/models/tinyllama:1b")
133122
assert response.status_code == 200
134-
123+
135124
def test_models_sources_api(self):
136125
json_body = {"source": "https://huggingface.co/cortexso/tinyllama"}
137126
response = requests.post(
138127
"http://localhost:3928/v1/models/sources", json=json_body
139128
)
140129
assert response.status_code == 200, f"status_code: {response.status_code}"
141-
130+
142131
json_body = {"source": "https://huggingface.co/cortexso/tinyllama"}
143132
response = requests.delete(
144133
"http://localhost:3928/v1/models/sources", json=json_body

engine/services/model_service.cc

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,17 @@ cpp::result<DownloadTask, std::string> ModelService::PullModel(
809809
return DownloadModelFromCortexsoAsync(
810810
model_and_branch[0], model_and_branch[1], desired_model_id);
811811

812+
// single file GGUF format - author_id:model_id:GGUF_filename
813+
if (model_and_branch.size() == 3) {
814+
url_parser::Url url;
815+
url.protocol = "https";
816+
url.host = kHuggingFaceHost;
817+
url.pathParams = {model_and_branch[0], model_and_branch[1], "resolve",
818+
"main", model_and_branch[2]};
819+
return HandleDownloadUrlAsync(url.ToFullPath(), desired_model_id,
820+
desired_model_name);
821+
}
822+
812823
return cpp::fail("Invalid model handle or not supported!");
813824
}
814825

0 commit comments

Comments
 (0)