Skip to content

Commit 0531744

Browse files
authored
server : passthrough the /models endpoint during loading (#13535)
* server : passthrough the /models endpoint during loading * server : update readme + return json for "meta" field
1 parent 360a9c9 commit 0531744

File tree

2 files changed

+12
-3
lines changed

2 files changed

+12
-3
lines changed

tools/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1040,7 +1040,7 @@ To know the `id` of the adapter, use GET `/lora-adapters`
10401040

10411041
Returns information about the loaded model. See [OpenAI Models API documentation](https://platform.openai.com/docs/api-reference/models).
10421042

1043-
The returned list always has one single element.
1043+
The returned list always has one single element. The `meta` field can be `null` (for example, while the model is still loading).
10441044

10451045
By default, model `id` field is the path to model file, specified via `-m`. You can set a custom value for model `id` field via `--alias` argument. For example, `--alias gpt-4o-mini`.
10461046

tools/server/server.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3707,6 +3707,9 @@ int main(int argc, char ** argv) {
37073707
if (req.path == "/" || tmp.back() == "html") {
37083708
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
37093709
res.status = 503;
3710+
} else if (req.path == "/models" || req.path == "/v1/models") {
3711+
// allow the models endpoint to be accessed during loading
3712+
return true;
37103713
} else {
37113714
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
37123715
}
@@ -4365,7 +4368,13 @@ int main(int argc, char ** argv) {
43654368
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
43664369
};
43674370

4368-
const auto handle_models = [&params, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
4371+
const auto handle_models = [&params, &ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
4372+
server_state current_state = state.load();
4373+
json model_meta = nullptr;
4374+
if (current_state == SERVER_STATE_READY) {
4375+
model_meta = ctx_server.model_meta();
4376+
}
4377+
43694378
json models = {
43704379
{"object", "list"},
43714380
{"data", {
@@ -4374,7 +4383,7 @@ int main(int argc, char ** argv) {
43744383
{"object", "model"},
43754384
{"created", std::time(0)},
43764385
{"owned_by", "llamacpp"},
4377-
{"meta", ctx_server.model_meta()}
4386+
{"meta", model_meta},
43784387
},
43794388
}}
43804389
};

0 commit comments

Comments
 (0)