Skip to content

Commit 9bb1ae6

Browse files
committed
add test for slots endpoint
1 parent db97c8b commit 9bb1ae6

File tree

4 files changed

+17
-2
lines changed

4 files changed

+17
-2
lines changed

examples/server/server.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2184,6 +2184,7 @@ struct server_context {
21842184

21852185
auto res = std::make_unique<server_task_result_metrics>();
21862186
res->id = task.id;
2187+
res->slots_data = slots_data;
21872188
res->n_idle_slots = n_idle_slots;
21882189
res->n_processing_slots = n_processing_slots;
21892190
res->n_tasks_deferred = queue_tasks.queue_tasks_deferred.size();

examples/server/tests/unit/test_basic.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,17 @@ def test_server_models():
3333
assert len(res.body["data"]) == 1
3434
assert res.body["data"][0]["id"] == server.model_alias
3535

36+
37+
def test_server_slots():
38+
global server
39+
server.server_slots = True
40+
server.start()
41+
res = server.make_request("GET", "/slots")
42+
assert res.status_code == 200
43+
assert len(res.body) == server.n_slots
44+
assert res.body[0]["n_ctx"] > 0
45+
46+
3647
def test_load_split_model():
3748
global server
3849
server.model_hf_repo = "ggml-org/models"

examples/server/tests/unit/test_chat_completion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
3030
],
3131
})
3232
assert res.status_code == 200
33-
assert "cmpl" in res.body["id"]
33+
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
3434
assert res.body["model"] == model if model is not None else server.model_alias
3535
assert res.body["usage"]["prompt_tokens"] == n_prompt
3636
assert res.body["usage"]["completion_tokens"] == n_predicted
@@ -66,7 +66,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
6666
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
6767
if last_cmpl_id is None:
6868
last_cmpl_id = data["id"]
69-
assert last_cmpl_id == data["id"]
69+
assert last_cmpl_id == data["id"] # make sure the completion id is the same for all events in the stream
7070
if choice["finish_reason"] in ["stop", "length"]:
7171
assert data["usage"]["prompt_tokens"] == n_prompt
7272
assert data["usage"]["completion_tokens"] == n_predicted

examples/server/tests/utils.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class ServerProcess:
6464
server_embeddings: bool | None = False
6565
server_reranking: bool | None = False
6666
server_metrics: bool | None = False
67+
server_slots: bool | None = False
6768
draft: int | None = None
6869
api_key: str | None = None
6970
response_format: str | None = None
@@ -129,6 +130,8 @@ def start(self, timeout_seconds: int = 10) -> None:
129130
server_args.append("--reranking")
130131
if self.server_metrics:
131132
server_args.append("--metrics")
133+
if self.server_slots:
134+
server_args.append("--slots")
132135
if self.model_alias:
133136
server_args.extend(["--alias", self.model_alias])
134137
if self.n_ctx:

0 commit comments

Comments
 (0)