@@ -334,24 +334,24 @@ struct server_task {
334334 if (data.contains (" json_schema" ) && !data.contains (" grammar" )) {
335335 try {
336336 auto schema = json_value (data, " json_schema" , json::object ());
337- LOG_DBG (" JSON schema: %s\n " , schema.dump (2 ).c_str ());
337+ SRV_DBG (" JSON schema: %s\n " , schema.dump (2 ).c_str ());
338338 params.sampling .grammar = json_schema_to_grammar (schema);
339- LOG_DBG (" Converted grammar: %s\n " , params.sampling .grammar .c_str ());
339+ SRV_DBG (" Converted grammar: %s\n " , params.sampling .grammar .c_str ());
340340 } catch (const std::exception & e) {
341341 throw std::runtime_error (std::string (" \" json_schema\" : " ) + e.what ());
342342 }
343343 } else {
344344 params.sampling .grammar = json_value (data, " grammar" , defaults.sampling .grammar );
345- LOG_DBG (" Grammar: %s\n " , params.sampling .grammar .c_str ());
345+ SRV_DBG (" Grammar: %s\n " , params.sampling .grammar .c_str ());
346346 params.sampling .grammar_lazy = json_value (data, " grammar_lazy" , defaults.sampling .grammar_lazy );
347- LOG_DBG (" Grammar lazy: %s\n " , params.sampling .grammar_lazy ? " true" : " false" );
347+ SRV_DBG (" Grammar lazy: %s\n " , params.sampling .grammar_lazy ? " true" : " false" );
348348 }
349349
350350 {
351351 auto it = data.find (" chat_format" );
352352 if (it != data.end ()) {
353353 params.oaicompat_chat_format = static_cast <common_chat_format>(it->get <int >());
354- LOG_INF (" Chat format: %s\n " , common_chat_format_name (params.oaicompat_chat_format ).c_str ());
354+ SRV_INF (" Chat format: %s\n " , common_chat_format_name (params.oaicompat_chat_format ).c_str ());
355355 } else {
356356 params.oaicompat_chat_format = defaults.oaicompat_chat_format ;
357357 }
@@ -367,12 +367,12 @@ struct server_task {
367367
368368 auto ids = common_tokenize (vocab, trigger.word , /* add_special= */ false , /* parse_special= */ true );
369369 if (ids.size () == 1 ) {
370- LOG_DBG (" Grammar trigger token: %d (`%s`)\n " , ids[0 ], trigger.word .c_str ());
370+ SRV_DBG (" Grammar trigger token: %d (`%s`)\n " , ids[0 ], trigger.word .c_str ());
371371 params.sampling .grammar_trigger_tokens .push_back (ids[0 ]);
372372 params.sampling .preserved_tokens .insert (ids[0 ]);
373373 continue ;
374374 }
375- LOG_DBG (" Grammar trigger word: `%s`\n " , trigger.word .c_str ());
375+ SRV_DBG (" Grammar trigger word: `%s`\n " , trigger.word .c_str ());
376376 params.sampling .grammar_trigger_words .push_back (trigger);
377377 }
378378 }
@@ -381,11 +381,11 @@ struct server_task {
381381 for (const auto & t : *preserved_tokens) {
382382 auto ids = common_tokenize (vocab, t.get <std::string>(), /* add_special= */ false , /* parse_special= */ true );
383383 if (ids.size () == 1 ) {
384- LOG_DBG (" Preserved token: %d\n " , ids[0 ]);
384+ SRV_DBG (" Preserved token: %d\n " , ids[0 ]);
385385 params.sampling .preserved_tokens .insert (ids[0 ]);
386386 } else {
387387 // This may happen when using a tool call style meant for a model with special tokens to preserve on a model without said tokens.
388- LOG_WRN (" Not preserved because more than 1 token (wrong chat template override?): %s\n " , t.get <std::string>().c_str ());
388+ SRV_WRN (" Not preserved because more than 1 token (wrong chat template override?): %s\n " , t.get <std::string>().c_str ());
389389 }
390390 }
391391 }
@@ -717,7 +717,7 @@ struct server_task_result_cmpl_final : server_task_result {
717717 std::string finish_reason = " length" ;
718718 common_chat_msg msg;
719719 if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
720- LOG_DBG (" Parsing chat message: %s\n " , content.c_str ());
720+ SRV_DBG (" Parsing chat message: %s\n " , content.c_str ());
721721 msg = common_chat_parse (content, oaicompat_chat_format);
722722 finish_reason = msg.tool_calls .empty () ? " stop" : " tool_calls" ;
723723 } else {
@@ -1885,7 +1885,7 @@ struct server_context {
18851885 }
18861886
18871887 if (params_base.chat_template .empty () && !validate_builtin_chat_template (params.use_jinja )) {
1888- LOG_WRN (" %s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n " , __func__);
1888+ SRV_WRN (" %s: The chat template that comes with this model is not yet supported, falling back to chatml. This may cause the model to output suboptimal responses\n " , __func__);
18891889 chat_templates = common_chat_templates_from_model (model, " chatml" );
18901890 } else {
18911891 chat_templates = common_chat_templates_from_model (model, params_base.chat_template );
@@ -3355,10 +3355,10 @@ static void log_server_request(const httplib::Request & req, const httplib::Resp
33553355
33563356 // reminder: this function is not covered by httplib's exception handler; if someone does more complicated stuff, think about wrapping it in try-catch
33573357
3358- LOG_INF (" request: %s %s %s %d\n " , req.method .c_str (), req.path .c_str (), req.remote_addr .c_str (), res.status );
3358+ SRV_INF (" request: %s %s %s %d\n " , req.method .c_str (), req.path .c_str (), req.remote_addr .c_str (), res.status );
33593359
3360- LOG_DBG (" request: %s\n " , req.body .c_str ());
3361- LOG_DBG (" response: %s\n " , res.body .c_str ());
3360+ SRV_DBG (" request: %s\n " , req.body .c_str ());
3361+ SRV_DBG (" response: %s\n " , res.body .c_str ());
33623362}
33633363
33643364std::function<void (int )> shutdown_handler;
@@ -3860,7 +3860,9 @@ int main(int argc, char ** argv) {
38603860
38613861 try {
38623862 const auto & prompt = data.at (" prompt" );
3863- LOG_DBG (" Prompt: %s\n " , prompt.is_string () ? prompt.get <std::string>().c_str () : prompt.dump (2 ).c_str ());
3863+ // TODO: this log can become very long, put it behind a flag or think about a more compact format
3864+ // SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3865+
38643866 std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts (ctx_server.vocab , prompt, true , true );
38653867 tasks.reserve (tokenized_prompts.size ());
38663868 for (size_t i = 0 ; i < tokenized_prompts.size (); i++) {
0 commit comments