@@ -3702,6 +3702,7 @@ int main(int argc, char ** argv) {
37023702 " /health" ,
37033703 " /models" ,
37043704 " /v1/models" ,
3705+ " /api/tags"
37053706 };
37063707
37073708 // If API key is not set, skip validation
@@ -3740,7 +3741,7 @@ int main(int argc, char ** argv) {
37403741 if (req.path == " /" || tmp.back () == " html" ) {
37413742 res.set_content (reinterpret_cast <const char *>(loading_html), loading_html_len, " text/html; charset=utf-8" );
37423743 res.status = 503 ;
3743- } else if (req.path == " /models" || req.path == " /v1/models" ) {
3744+ } else if (req.path == " /models" || req.path == " /v1/models" || req. path == " /api/tags " ) {
37443745 // allow the models endpoint to be accessed during loading
37453746 return true ;
37463747 } else {
@@ -4076,7 +4077,21 @@ int main(int argc, char ** argv) {
40764077 res_ok (res, {{ " success" , true }});
40774078 };
40784079
4079- const auto handle_api_show = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
4080+ const auto handle_api_show = [&ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
4081+ server_state current_state = state.load ();
4082+ const auto * model = llama_get_model (ctx_server.ctx );
4083+
4084+ // Get basic model info
4085+ char arch_buf[64 ] = {0 };
4086+ char param_size_buf[64 ] = {0 };
4087+ llama_model_meta_val_str (model, " general.architecture" , arch_buf, sizeof (arch_buf));
4088+ llama_model_meta_val_str (model, " general.parameter_count" , param_size_buf, sizeof (param_size_buf));
4089+
4090+ json model_meta = nullptr ;
4091+ if (current_state == SERVER_STATE_READY) {
4092+ model_meta = ctx_server.model_meta ();
4093+ }
4094+
40804095 json data = {
40814096 {
40824097 " template" , common_chat_templates_source (ctx_server.chat_templates .get ()),
@@ -4086,6 +4101,19 @@ int main(int argc, char ** argv) {
40864101 { " llama.context_length" , ctx_server.slots .back ().n_ctx , },
40874102 }
40884103 },
4104+ {" modelfile" , " " }, // Specific to ollama and does not seem to be needed
4105+ {" parameters" , " " }, // TODO: add parameters
4106+ {" template" , common_chat_templates_source (ctx_server.chat_templates .get ())},
4107+ {" details" , {
4108+ {" parent_model" , " " }, // TODO: add parent model if available
4109+ {" format" , " gguf" },
4110+ {" family" , arch_buf},
4111+ {" families" , {arch_buf}},
4112+ {" parameter_size" , param_size_buf},
4113+ {" quantization_level" , " " } // TODO: add quantization level if available
4114+ }},
4115+ {" model_info" , model_meta},
4116+ {" capabilities" , {" completion" }} // TODO: add other capabilities if available
40894117 };
40904118
40914119 res_ok (res, data);
@@ -4409,8 +4437,43 @@ int main(int argc, char ** argv) {
44094437 if (current_state == SERVER_STATE_READY) {
44104438 model_meta = ctx_server.model_meta ();
44114439 }
4412-
4440+ // Get file metadata
4441+ struct stat file_stat;
4442+ stat (params.model .path .c_str (), &file_stat);
4443+
4444+ // Convert modified time to ISO 8601
4445+ char modified_buf[64 ];
4446+ strftime (modified_buf, sizeof (modified_buf), " %Y-%m-%dT%H:%M:%S%z" , localtime (&file_stat.st_mtime ));
4447+
4448+ const auto * model = llama_get_model (ctx_server.ctx );
4449+ char arch_buf[64 ] = {0 };
4450+ char param_size_buf[64 ] = {0 };
4451+ llama_model_meta_val_str (model, " general.architecture" , arch_buf, sizeof (arch_buf));
4452+ llama_model_meta_val_str (model, " general.parameter_count" , param_size_buf, sizeof (param_size_buf));
4453+
44134454 json models = {
4455+ {" models" , {
4456+ {
4457+ {" name" , params.model_alias .empty () ? params.model .path : params.model_alias },
4458+ {" model" , params.model_alias .empty () ? params.model .path : params.model_alias },
4459+ {" modified_at" , modified_buf},
4460+ {" size" , file_stat.st_size },
4461+ {" digest" , " " }, // TODO: add digest
4462+ {" type" , " model" },
4463+ {" description" , " " },
4464+ {" tags" , {arch_buf}},
4465+ {" capabilities" , {" completion" }},
4466+ {" parameters" , " " }, // TODO: add parameters
4467+ {" details" , {
4468+ {" parent_model" , " " }, // TODO: Add parent_model
4469+ {" format" , " gguf" },
4470+ {" family" , arch_buf},
4471+ {" families" , {arch_buf}},
4472+ {" parameter_size" , param_size_buf},
4473+ {" quantization_level" , " " } // TODO: add quantization level if available
4474+ }}
4475+ }
4476+ }},
44144477 {" object" , " list" },
44154478 {" data" , {
44164479 {
@@ -4420,7 +4483,7 @@ int main(int argc, char ** argv) {
44204483 {" owned_by" , " llamacpp" },
44214484 {" meta" , model_meta},
44224485 },
4423- }}
4486+ }}
44244487 };
44254488
44264489 res_ok (res, models);
@@ -4748,11 +4811,13 @@ int main(int argc, char ** argv) {
47484811 svr->Post (" /api/show" , handle_api_show);
47494812 svr->Get (" /models" , handle_models); // public endpoint (no API key check)
47504813 svr->Get (" /v1/models" , handle_models); // public endpoint (no API key check)
4814+ svr->Get (" /api/tags" , handle_models); // ollama specific endpoint. public endpoint (no API key check)
47514815 svr->Post (" /completion" , handle_completions); // legacy
47524816 svr->Post (" /completions" , handle_completions);
47534817 svr->Post (" /v1/completions" , handle_completions_oai);
47544818 svr->Post (" /chat/completions" , handle_chat_completions);
47554819 svr->Post (" /v1/chat/completions" , handle_chat_completions);
4820+ svr->Post (" /api/chat" , handle_chat_completions); // ollama specific endpoint
47564821 svr->Post (" /infill" , handle_infill);
47574822 svr->Post (" /embedding" , handle_embeddings); // legacy
47584823 svr->Post (" /embeddings" , handle_embeddings);
0 commit comments