1212// increase max payload length to allow use of larger context size
1313#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
1414#include " httplib.h"
15+ // Change JSON_ASSERT from assert() to GGML_ASSERT:
16+ #define JSON_ASSERT GGML_ASSERT
1517#include " json.hpp"
1618
1719// auto generated files (update with ./deps.sh)
@@ -859,7 +861,7 @@ struct server_context {
859861 slot.sparams .min_keep = json_value (data, " min_keep" , default_sparams.min_keep );
860862
861863 // process "json_schema" and "grammar"
862- if (data.contains (" json_schema" ) && !data[ " json_schema" ] .is_null () && data.contains (" grammar" ) && !data[ " grammar" ] .is_null ()) {
864+ if (data.contains (" json_schema" ) && !data. at ( " json_schema" ) .is_null () && data.contains (" grammar" ) && !data. at ( " grammar" ) .is_null ()) {
863865 send_error (task, " Either \" json_schema\" or \" grammar\" can be specified, but not both" , ERROR_TYPE_INVALID_REQUEST);
864866 return false ;
865867 } else if (data.contains (" json_schema" ) && !data.contains (" grammar" )) {
@@ -1512,7 +1514,7 @@ struct server_context {
15121514 // add subtasks
15131515 for (int i = 0 ; i < prompt_count; i++) {
15141516 json subtask_data = multiprompt_task.data ;
1515- subtask_data[" prompt" ] = subtask_data[ " prompt" ] [i];
1517+ subtask_data[" prompt" ] = subtask_data. at ( " prompt" ) [i];
15161518
15171519 // subtasks inherit everything else (infill mode, embedding mode, etc.)
15181520 request_completion (subtask_ids[i], id_multi, subtask_data, multiprompt_task.infill , multiprompt_task.embedding );
@@ -1532,7 +1534,7 @@ struct server_context {
15321534 }
15331535
15341536 if (task.data .contains (" system_prompt" )) {
1535- system_prompt_set (task.data [ " system_prompt" ] );
1537+ system_prompt_set (task.data . at ( " system_prompt" ) );
15361538
15371539 for (server_slot & slot : slots) {
15381540 slot.n_past = 0 ;
@@ -1644,7 +1646,7 @@ struct server_context {
16441646 } break ;
16451647 case SERVER_TASK_TYPE_SLOT_SAVE:
16461648 {
1647- int id_slot = task.data [ " id_slot" ] ;
1649+ int id_slot = task.data . at ( " id_slot" ) ;
16481650 server_slot * slot = get_slot (id_slot);
16491651 if (slot == nullptr ) {
16501652 send_error (task, " Invalid slot ID" , ERROR_TYPE_INVALID_REQUEST);
@@ -1654,8 +1656,8 @@ struct server_context {
16541656 const size_t token_count = slot->cache_tokens .size ();
16551657 const int64_t t_start = ggml_time_us ();
16561658
1657- std::string filename = task.data [ " filename" ] ;
1658- std::string filepath = task.data [ " filepath" ] ;
1659+ std::string filename = task.data . at ( " filename" ) ;
1660+ std::string filepath = task.data . at ( " filepath" ) ;
16591661
16601662 const size_t nwrite = llama_state_seq_save_file (ctx, filepath.c_str (), slot->id + 1 , slot->cache_tokens .data (), token_count);
16611663
@@ -1679,7 +1681,7 @@ struct server_context {
16791681 } break ;
16801682 case SERVER_TASK_TYPE_SLOT_RESTORE:
16811683 {
1682- int id_slot = task.data [ " id_slot" ] ;
1684+ int id_slot = task.data . at ( " id_slot" ) ;
16831685 server_slot * slot = get_slot (id_slot);
16841686 if (slot == nullptr ) {
16851687 send_error (task, " Invalid slot ID" , ERROR_TYPE_INVALID_REQUEST);
@@ -1688,8 +1690,8 @@ struct server_context {
16881690
16891691 const int64_t t_start = ggml_time_us ();
16901692
1691- std::string filename = task.data [ " filename" ] ;
1692- std::string filepath = task.data [ " filepath" ] ;
1693+ std::string filename = task.data . at ( " filename" ) ;
1694+ std::string filepath = task.data . at ( " filepath" ) ;
16931695
16941696 slot->cache_tokens .resize (slot->n_ctx );
16951697 size_t token_count = 0 ;
@@ -1721,7 +1723,7 @@ struct server_context {
17211723 } break ;
17221724 case SERVER_TASK_TYPE_SLOT_ERASE:
17231725 {
1724- int id_slot = task.data [ " id_slot" ] ;
1726+ int id_slot = task.data . at ( " id_slot" ) ;
17251727 server_slot * slot = get_slot (id_slot);
17261728 if (slot == nullptr ) {
17271729 send_error (task, " Invalid slot ID" , ERROR_TYPE_INVALID_REQUEST);
@@ -3136,8 +3138,8 @@ int main(int argc, char ** argv) {
31363138 server_task_result result = ctx_server.queue_results .recv (task.id );
31373139 ctx_server.queue_results .remove_waiting_task_id (task.id );
31383140
3139- const int n_idle_slots = result.data [ " idle" ] ;
3140- const int n_processing_slots = result.data [ " processing" ] ;
3141+ const int n_idle_slots = result.data . at ( " idle" ) ;
3142+ const int n_processing_slots = result.data . at ( " processing" ) ;
31413143
31423144 json health = {
31433145 {" status" , " ok" },
@@ -3147,7 +3149,7 @@ int main(int argc, char ** argv) {
31473149
31483150 res.status = 200 ; // HTTP OK
31493151 if (sparams.slots_endpoint && req.has_param (" include_slots" )) {
3150- health[" slots" ] = result.data [ " slots" ] ;
3152+ health[" slots" ] = result.data . at ( " slots" ) ;
31513153 }
31523154
31533155 if (n_idle_slots == 0 ) {
@@ -3191,7 +3193,7 @@ int main(int argc, char ** argv) {
31913193 server_task_result result = ctx_server.queue_results .recv (task.id );
31923194 ctx_server.queue_results .remove_waiting_task_id (task.id );
31933195
3194- res.set_content (result.data [ " slots" ] .dump (), " application/json" );
3196+ res.set_content (result.data . at ( " slots" ) .dump (), " application/json" );
31953197 res.status = 200 ; // HTTP OK
31963198 };
31973199
@@ -3218,32 +3220,32 @@ int main(int argc, char ** argv) {
32183220
32193221 json data = result.data ;
32203222
3221- const uint64_t n_prompt_tokens_processed = data[ " n_prompt_tokens_processed" ] ;
3222- const uint64_t t_prompt_processing = data[ " t_prompt_processing" ] ;
3223+ const uint64_t n_prompt_tokens_processed = data. at ( " n_prompt_tokens_processed" ) ;
3224+ const uint64_t t_prompt_processing = data. at ( " t_prompt_processing" ) ;
32233225
3224- const uint64_t n_tokens_predicted = data[ " n_tokens_predicted" ] ;
3225- const uint64_t t_tokens_generation = data[ " t_tokens_generation" ] ;
3226+ const uint64_t n_tokens_predicted = data. at ( " n_tokens_predicted" ) ;
3227+ const uint64_t t_tokens_generation = data. at ( " t_tokens_generation" ) ;
32263228
3227- const int32_t kv_cache_used_cells = data[ " kv_cache_used_cells" ] ;
3229+ const int32_t kv_cache_used_cells = data. at ( " kv_cache_used_cells" ) ;
32283230
32293231 // metrics definition: https://prometheus.io/docs/practices/naming/#metric-names
32303232 json all_metrics_def = json {
32313233 {" counter" , {{
32323234 {" name" , " prompt_tokens_total" },
32333235 {" help" , " Number of prompt tokens processed." },
3234- {" value" , (uint64_t ) data[ " n_prompt_tokens_processed_total" ] }
3236+ {" value" , (uint64_t ) data. at ( " n_prompt_tokens_processed_total" ) }
32353237 }, {
32363238 {" name" , " prompt_seconds_total" },
32373239 {" help" , " Prompt process time" },
3238- {" value" , (uint64_t ) data[ " t_prompt_processing_total" ] / 1 .e3 }
3240+ {" value" , (uint64_t ) data. at ( " t_prompt_processing_total" ) / 1 .e3 }
32393241 }, {
32403242 {" name" , " tokens_predicted_total" },
32413243 {" help" , " Number of generation tokens processed." },
3242- {" value" , (uint64_t ) data[ " n_tokens_predicted_total" ] }
3244+ {" value" , (uint64_t ) data. at ( " n_tokens_predicted_total" ) }
32433245 }, {
32443246 {" name" , " tokens_predicted_seconds_total" },
32453247 {" help" , " Predict process time" },
3246- {" value" , (uint64_t ) data[ " t_tokens_generation_total" ] / 1 .e3 }
3248+ {" value" , (uint64_t ) data. at ( " t_tokens_generation_total" ) / 1 .e3 }
32473249 }}},
32483250 {" gauge" , {{
32493251 {" name" , " prompt_tokens_seconds" },
@@ -3260,15 +3262,15 @@ int main(int argc, char ** argv) {
32603262 },{
32613263 {" name" , " kv_cache_tokens" },
32623264 {" help" , " KV-cache tokens." },
3263- {" value" , (uint64_t ) data[ " kv_cache_tokens_count" ] }
3265+ {" value" , (uint64_t ) data. at ( " kv_cache_tokens_count" ) }
32643266 },{
32653267 {" name" , " requests_processing" },
32663268 {" help" , " Number of request processing." },
3267- {" value" , (uint64_t ) data[ " processing" ] }
3269+ {" value" , (uint64_t ) data. at ( " processing" ) }
32683270 },{
32693271 {" name" , " requests_deferred" },
32703272 {" help" , " Number of request deferred." },
3271- {" value" , (uint64_t ) data[ " deferred" ] }
3273+ {" value" , (uint64_t ) data. at ( " deferred" ) }
32723274 }}}
32733275 };
32743276
@@ -3279,8 +3281,8 @@ int main(int argc, char ** argv) {
32793281 const auto & metrics_def = el.value ();
32803282
32813283 for (const auto & metric_def : metrics_def) {
3282- const std::string name = metric_def[ " name" ] ;
3283- const std::string help = metric_def[ " help" ] ;
3284+ const std::string name = metric_def. at ( " name" ) ;
3285+ const std::string help = metric_def. at ( " help" ) ;
32843286
32853287 auto value = json_value (metric_def, " value" , 0 .);
32863288 prometheus << " # HELP llamacpp:" << name << " " << help << " \n "
@@ -3289,7 +3291,7 @@ int main(int argc, char ** argv) {
32893291 }
32903292 }
32913293
3292- const int64_t t_start = data[ " t_start" ] ;
3294+ const int64_t t_start = data. at ( " t_start" ) ;
32933295 res.set_header (" Process-Start-Time-Unix" , std::to_string (t_start));
32943296
32953297 res.set_content (prometheus.str (), " text/plain; version=0.0.4" );
@@ -3298,7 +3300,7 @@ int main(int argc, char ** argv) {
32983300
32993301 const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
33003302 json request_data = json::parse (req.body );
3301- std::string filename = request_data[ " filename" ] ;
3303+ std::string filename = request_data. at ( " filename" ) ;
33023304 if (!validate_file_name (filename)) {
33033305 res_error (res, format_error_response (" Invalid filename" , ERROR_TYPE_INVALID_REQUEST));
33043306 return ;
@@ -3328,7 +3330,7 @@ int main(int argc, char ** argv) {
33283330
33293331 const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
33303332 json request_data = json::parse (req.body );
3331- std::string filename = request_data[ " filename" ] ;
3333+ std::string filename = request_data. at ( " filename" ) ;
33323334 if (!validate_file_name (filename)) {
33333335 res_error (res, format_error_response (" Invalid filename" , ERROR_TYPE_INVALID_REQUEST));
33343336 return ;
@@ -3648,7 +3650,7 @@ int main(int argc, char ** argv) {
36483650 std::vector<llama_token> tokens;
36493651 if (body.count (" content" ) != 0 ) {
36503652 const bool add_special = json_value (body, " add_special" , false );
3651- tokens = ctx_server.tokenize (body[ " content" ] , add_special);
3653+ tokens = ctx_server.tokenize (body. at ( " content" ) , add_special);
36523654 }
36533655 const json data = format_tokenizer_response (tokens);
36543656 return res.set_content (data.dump (), " application/json; charset=utf-8" );
@@ -3660,7 +3662,7 @@ int main(int argc, char ** argv) {
36603662
36613663 std::string content;
36623664 if (body.count (" tokens" ) != 0 ) {
3663- const std::vector<llama_token> tokens = body[ " tokens" ] ;
3665+ const std::vector<llama_token> tokens = body. at ( " tokens" ) ;
36643666 content = tokens_to_str (ctx_server.ctx , tokens.cbegin (), tokens.cend ());
36653667 }
36663668
@@ -3683,10 +3685,10 @@ int main(int argc, char ** argv) {
36833685 json prompt;
36843686 if (body.count (" input" ) != 0 ) {
36853687 is_openai = true ;
3686- prompt = body[ " input" ] ;
3688+ prompt = body. at ( " input" ) ;
36873689 } else if (body.count (" content" ) != 0 ) {
36883690 // with "content", we only support single prompt
3689- prompt = std::vector<std::string>{body[ " content" ] };
3691+ prompt = std::vector<std::string>{body. at ( " content" ) };
36903692 } else {
36913693 res_error (res, format_error_response (" \" input\" or \" content\" must be provided" , ERROR_TYPE_INVALID_REQUEST));
36923694 return ;
@@ -3705,7 +3707,7 @@ int main(int argc, char ** argv) {
37053707 if (!result.error ) {
37063708 if (result.data .count (" results" )) {
37073709 // result for multi-task
3708- responses = result.data [ " results" ] ;
3710+ responses = result.data . at ( " results" ) ;
37093711 } else {
37103712 // result for single task
37113713 responses = std::vector<json>{result.data };
0 commit comments