Reduce the verbose logging

kpouget · kpouget · commit 3d7b19d64e12 · 2025-05-02T09:29:18.000+02:00
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -96,7 +96,7 @@ llama_context::llama_context(
     cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch);
 
     const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max;
-
+/*
     LLAMA_LOG_INFO("%s: n_seq_max     = %u\n",   __func__, cparams.n_seq_max);
     LLAMA_LOG_INFO("%s: n_ctx         = %u\n",   __func__, cparams.n_ctx);
     LLAMA_LOG_INFO("%s: n_ctx_per_seq = %u\n",   __func__, n_ctx_per_seq);
@@ -106,7 +106,7 @@ llama_context::llama_context(
     LLAMA_LOG_INFO("%s: flash_attn    = %d\n",   __func__, cparams.flash_attn);
     LLAMA_LOG_INFO("%s: freq_base     = %.1f\n", __func__, cparams.rope_freq_base);
     LLAMA_LOG_INFO("%s: freq_scale    = %g\n",   __func__, cparams.rope_freq_scale);
-
+*/
     if (n_ctx_per_seq < hparams.n_ctx_train) {
         LLAMA_LOG_WARN("%s: n_ctx_per_seq (%u) < n_ctx_train (%u) -- the full capacity of the model will not be utilized\n",
                 __func__, n_ctx_per_seq, hparams.n_ctx_train);
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
@@ -651,7 +651,7 @@ llama_model_loader::llama_model_loader(
             }
         }
 
-        LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
+        //LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
 
         for (int i = 0; i < n_kv; i++) {
             const char * name           = gguf_get_key(meta.get(), i);
@@ -677,7 +677,7 @@ llama_model_loader::llama_model_loader(
                 continue;
             }
 
-            LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
+            //LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
         }
     }
 
@@ -1119,6 +1119,7 @@ std::string llama_model_loader::ftype_name() const {
 }
 
 void llama_model_loader::print_info() const {
+    return;
     LLAMA_LOG_INFO("%s: file format = %s\n", __func__, llama_file_version_name(fver));
     LLAMA_LOG_INFO("%s: file type   = %s\n", __func__, llama_model_ftype_name(ftype).c_str());
     if (n_bytes < GiB) {
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -1458,12 +1458,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
     auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
         const bool is_swa = il < (int) hparams.n_layer && hparams.is_swa(il);
         if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
-            LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(cpu_dev), is_swa);
+            //LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(cpu_dev), is_swa);
             return {cpu_dev, &pimpl->cpu_buft_list};
         }
         const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
         auto * dev = devices.at(layer_gpu);
-        LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(dev), is_swa);
+        //LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(dev), is_swa);
         return {dev, &pimpl->gpu_buft_list.at(dev)};
     };
 
@@ -4144,6 +4144,7 @@ uint64_t llama_model::n_elements() const {
 }
 
 void llama_model::print_info() const {
+    return;
     const char * rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at(hparams.rope_scaling_type_train);
 
     auto print_f = [](const std::function<uint32_t(uint32_t)> & f, uint32_t n) {
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -2731,6 +2731,7 @@ int32_t llama_vocab::impl::detokenize(
 }
 
 void llama_vocab::impl::print_info() const {
+    return;
     LLAMA_LOG_INFO("%s: vocab type       = %s\n",     __func__, type_name().c_str());
     LLAMA_LOG_INFO("%s: n_vocab          = %u\n",     __func__, vocab.n_tokens());
     LLAMA_LOG_INFO("%s: n_merges         = %u\n",     __func__, (uint32_t) bpe_ranks.size());
@@ -3055,6 +3056,7 @@ std::string llama_vocab::detokenize(const std::vector<llama_token> & tokens, boo
 }
 
 void llama_vocab::print_info() const {
+    return;
     pimpl->print_info();
 }
 

Original file line number	Diff line number	Diff line change
`@@ -651,7 +651,7 @@ llama_model_loader::llama_model_loader(`
`651`	`651`	`}`
`652`	`652`	`}`
`653`	`653`
`654`		`- LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);`
	`654`	`+ //LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);`
`655`	`655`
`656`	`656`	`for (int i = 0; i < n_kv; i++) {`
`657`	`657`	`const char * name = gguf_get_key(meta.get(), i);`
`@@ -677,7 +677,7 @@ llama_model_loader::llama_model_loader(`
`677`	`677`	`continue;`
`678`	`678`	`}`
`679`	`679`
`680`		`- LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);`
	`680`	`+ //LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);`
`681`	`681`	`}`
`682`	`682`	`}`
`683`	`683`
`@@ -1119,6 +1119,7 @@ std::string llama_model_loader::ftype_name() const {`
`1119`	`1119`	`}`
`1120`	`1120`
`1121`	`1121`	`void llama_model_loader::print_info() const {`
	`1122`	`+ return;`
`1122`	`1123`	`LLAMA_LOG_INFO("%s: file format = %s\n", __func__, llama_file_version_name(fver));`
`1123`	`1124`	`LLAMA_LOG_INFO("%s: file type = %s\n", __func__, llama_model_ftype_name(ftype).c_str());`
`1124`	`1125`	`if (n_bytes < GiB) {`
Original file line number	Diff line number	Diff line change
`@@ -2731,6 +2731,7 @@ int32_t llama_vocab::impl::detokenize(`
`2731`	`2731`	`}`
`2732`	`2732`
`2733`	`2733`	`void llama_vocab::impl::print_info() const {`
	`2734`	`+ return;`
`2734`	`2735`	`LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, type_name().c_str());`
`2735`	`2736`	`LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, vocab.n_tokens());`
`2736`	`2737`	`LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (uint32_t) bpe_ranks.size());`
`@@ -3055,6 +3056,7 @@ std::string llama_vocab::detokenize(const std::vector<llama_token> & tokens, boo`
`3055`	`3056`	`}`
`3056`	`3057`
`3057`	`3058`	`void llama_vocab::print_info() const {`
	`3059`	`+ return;`
`3058`	`3060`	`pimpl->print_info();`
`3059`	`3061`	`}`
`3060`	`3062`