Skip to content

Commit fd333a2

Browse files
authored
Add the endpoints /api/tags and /api/chat
Add the endpoints /api/tags and /api/chat, and improved the model metadata response
1 parent 759e37b commit fd333a2

File tree

1 file changed

+69
-4
lines changed

1 file changed

+69
-4
lines changed

tools/server/server.cpp

Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3702,6 +3702,7 @@ int main(int argc, char ** argv) {
37023702
"/health",
37033703
"/models",
37043704
"/v1/models",
3705+
"/api/tags"
37053706
};
37063707

37073708
// If API key is not set, skip validation
@@ -3740,7 +3741,7 @@ int main(int argc, char ** argv) {
37403741
if (req.path == "/" || tmp.back() == "html") {
37413742
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
37423743
res.status = 503;
3743-
} else if (req.path == "/models" || req.path == "/v1/models") {
3744+
} else if (req.path == "/models" || req.path == "/v1/models" || req.path == "/api/tags") {
37443745
// allow the models endpoint to be accessed during loading
37453746
return true;
37463747
} else {
@@ -4076,7 +4077,21 @@ int main(int argc, char ** argv) {
40764077
res_ok(res, {{ "success", true }});
40774078
};
40784079

4079-
const auto handle_api_show = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
4080+
const auto handle_api_show = [&ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
4081+
server_state current_state = state.load();
4082+
const auto* model = llama_get_model(ctx_server.ctx);
4083+
4084+
// Get basic model info
4085+
char arch_buf[64] = {0};
4086+
char param_size_buf[64] = {0};
4087+
llama_model_meta_val_str(model, "general.architecture", arch_buf, sizeof(arch_buf));
4088+
llama_model_meta_val_str(model, "general.parameter_count", param_size_buf, sizeof(param_size_buf));
4089+
4090+
json model_meta = nullptr;
4091+
if (current_state == SERVER_STATE_READY) {
4092+
model_meta = ctx_server.model_meta();
4093+
}
4094+
40804095
json data = {
40814096
{
40824097
"template", common_chat_templates_source(ctx_server.chat_templates.get()),
@@ -4086,6 +4101,19 @@ int main(int argc, char ** argv) {
40864101
{ "llama.context_length", ctx_server.slots.back().n_ctx, },
40874102
}
40884103
},
4104+
{"modelfile", ""}, // Specific to ollama and does not seem to be needed
4105+
{"parameters", ""}, // TODO: add parameters
4106+
{"template", common_chat_templates_source(ctx_server.chat_templates.get())},
4107+
{"details", {
4108+
{"parent_model", ""}, // TODO: add parent model if available
4109+
{"format", "gguf"},
4110+
{"family", arch_buf},
4111+
{"families", {arch_buf}},
4112+
{"parameter_size", param_size_buf},
4113+
{"quantization_level", ""} // TODO: add quantization level if available
4114+
}},
4115+
{"model_info", model_meta},
4116+
{"capabilities", {"completion"}} // TODO: add other capabilities if available
40894117
};
40904118

40914119
res_ok(res, data);
@@ -4409,8 +4437,43 @@ int main(int argc, char ** argv) {
44094437
if (current_state == SERVER_STATE_READY) {
44104438
model_meta = ctx_server.model_meta();
44114439
}
4412-
4440+
// Get file metadata
4441+
struct stat file_stat;
4442+
stat(params.model.path.c_str(), &file_stat);
4443+
4444+
// Convert modified time to ISO 8601
4445+
char modified_buf[64];
4446+
strftime(modified_buf, sizeof(modified_buf), "%Y-%m-%dT%H:%M:%S%z", localtime(&file_stat.st_mtime));
4447+
4448+
const auto* model = llama_get_model(ctx_server.ctx);
4449+
char arch_buf[64] = {0};
4450+
char param_size_buf[64] = {0};
4451+
llama_model_meta_val_str(model, "general.architecture", arch_buf, sizeof(arch_buf));
4452+
llama_model_meta_val_str(model, "general.parameter_count", param_size_buf, sizeof(param_size_buf));
4453+
44134454
json models = {
4455+
{"models", {
4456+
{
4457+
{"name", params.model_alias.empty() ? params.model.path : params.model_alias},
4458+
{"model", params.model_alias.empty() ? params.model.path : params.model_alias},
4459+
{"modified_at", modified_buf},
4460+
{"size", file_stat.st_size},
4461+
{"digest", ""}, // TODO: add digest
4462+
{"type", "model"},
4463+
{"description", ""},
4464+
{"tags", {arch_buf}},
4465+
{"capabilities", {"completion"}},
4466+
{"parameters", ""}, // TODO: add parameters
4467+
{"details", {
4468+
{"parent_model", ""}, // TODO: Add parent_model
4469+
{"format", "gguf"},
4470+
{"family", arch_buf},
4471+
{"families", {arch_buf}},
4472+
{"parameter_size", param_size_buf},
4473+
{"quantization_level", ""} // TODO: add quantization level if available
4474+
}}
4475+
}
4476+
}},
44144477
{"object", "list"},
44154478
{"data", {
44164479
{
@@ -4420,7 +4483,7 @@ int main(int argc, char ** argv) {
44204483
{"owned_by", "llamacpp"},
44214484
{"meta", model_meta},
44224485
},
4423-
}}
4486+
}}
44244487
};
44254488

44264489
res_ok(res, models);
@@ -4748,11 +4811,13 @@ int main(int argc, char ** argv) {
47484811
svr->Post("/api/show", handle_api_show);
47494812
svr->Get ("/models", handle_models); // public endpoint (no API key check)
47504813
svr->Get ("/v1/models", handle_models); // public endpoint (no API key check)
4814+
svr->Get ("/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check)
47514815
svr->Post("/completion", handle_completions); // legacy
47524816
svr->Post("/completions", handle_completions);
47534817
svr->Post("/v1/completions", handle_completions_oai);
47544818
svr->Post("/chat/completions", handle_chat_completions);
47554819
svr->Post("/v1/chat/completions", handle_chat_completions);
4820+
svr->Post("/api/chat", handle_chat_completions); // ollama specific endpoint
47564821
svr->Post("/infill", handle_infill);
47574822
svr->Post("/embedding", handle_embeddings); // legacy
47584823
svr->Post("/embeddings", handle_embeddings);

0 commit comments

Comments
 (0)