File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -801,7 +801,8 @@ struct server_task_result_metrics : server_task_result {
801801 uint64_t n_decode_total = 0 ;
802802 uint64_t n_busy_slots_total = 0 ;
803803
804- // TODO: get rid of this json object and use to_json() instead
804+ // while we can also use std::vector<server_slot> this requires copying the slot object which can be quite messy
805+ // therefore, we use json to temporarily store the slot.to_json() result
805806 json slots_data = json::array();
806807
807808 virtual json to_json () override {
@@ -3326,9 +3327,11 @@ int main(int argc, char ** argv) {
33263327 };
33273328
33283329 const auto handle_props = [&ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
3330+ // this endpoint is publicly available, please only return what is safe to be exposed
33293331 json data = {
33303332 { " default_generation_settings" , ctx_server.default_generation_settings_for_props },
33313333 { " total_slots" , ctx_server.params_base .n_parallel },
3334+ { " model_path" , ctx_server.params_base .model },
33323335 { " chat_template" , llama_get_chat_template (ctx_server.model ) },
33333336 };
33343337
Original file line number Diff line number Diff line change @@ -22,6 +22,7 @@ def test_server_props():
2222 server .start ()
2323 res = server .make_request ("GET" , "/props" )
2424 assert res .status_code == 200
25+ assert ".gguf" in res .body ["model_path" ]
2526 assert res .body ["total_slots" ] == server .n_slots
2627 default_val = res .body ["default_generation_settings" ]
2728 assert server .n_ctx is not None and server .n_slots is not None
You can’t perform that action at this time.
0 commit comments