add "model_path" to /props

ngxson · ngxson · commit 1949f68f4e3c · 2024-12-07T20:05:04.000+01:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -801,7 +801,8 @@ struct server_task_result_metrics : server_task_result {
     uint64_t n_decode_total     = 0;
     uint64_t n_busy_slots_total = 0;
 
-    // TODO: get rid of this json object and use to_json() instead
+    // while we can also use std::vector<server_slot> this requires copying the slot object which can be quite messy
+    // therefore, we use json to temporarily store the slot.to_json() result
     json slots_data = json::array();
 
     virtual json to_json() override {
@@ -3326,9 +3327,11 @@ int main(int argc, char ** argv) {
     };
 
     const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
+        // this endpoint is publicly available, please only return what is safe to be exposed
         json data = {
             { "default_generation_settings", ctx_server.default_generation_settings_for_props },
             { "total_slots",                 ctx_server.params_base.n_parallel },
+            { "model_path",                  ctx_server.params_base.model },
             { "chat_template",               llama_get_chat_template(ctx_server.model) },
         };
 
diff --git a/examples/server/tests/unit/test_basic.py b/examples/server/tests/unit/test_basic.py
@@ -22,6 +22,7 @@ def test_server_props():
     server.start()
     res = server.make_request("GET", "/props")
     assert res.status_code == 200
+    assert ".gguf" in res.body["model_path"]
     assert res.body["total_slots"] == server.n_slots
     default_val = res.body["default_generation_settings"]
     assert server.n_ctx is not None and server.n_slots is not None