Skip to content

Commit 3d7b19d

Browse files
committed
Reduce the verbose logging
1 parent a87e398 commit 3d7b19d

File tree

4 files changed

+10
-6
lines changed

4 files changed

+10
-6
lines changed

src/llama-context.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ llama_context::llama_context(
9696
cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch);
9797

9898
const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max;
99-
99+
/*
100100
LLAMA_LOG_INFO("%s: n_seq_max = %u\n", __func__, cparams.n_seq_max);
101101
LLAMA_LOG_INFO("%s: n_ctx = %u\n", __func__, cparams.n_ctx);
102102
LLAMA_LOG_INFO("%s: n_ctx_per_seq = %u\n", __func__, n_ctx_per_seq);
@@ -106,7 +106,7 @@ llama_context::llama_context(
106106
LLAMA_LOG_INFO("%s: flash_attn = %d\n", __func__, cparams.flash_attn);
107107
LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, cparams.rope_freq_base);
108108
LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, cparams.rope_freq_scale);
109-
109+
*/
110110
if (n_ctx_per_seq < hparams.n_ctx_train) {
111111
LLAMA_LOG_WARN("%s: n_ctx_per_seq (%u) < n_ctx_train (%u) -- the full capacity of the model will not be utilized\n",
112112
__func__, n_ctx_per_seq, hparams.n_ctx_train);

src/llama-model-loader.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -651,7 +651,7 @@ llama_model_loader::llama_model_loader(
651651
}
652652
}
653653

654-
LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
654+
//LLAMA_LOG_INFO("%s: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n", __func__);
655655

656656
for (int i = 0; i < n_kv; i++) {
657657
const char * name = gguf_get_key(meta.get(), i);
@@ -677,7 +677,7 @@ llama_model_loader::llama_model_loader(
677677
continue;
678678
}
679679

680-
LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
680+
//LLAMA_LOG_INFO("%s: - type %4s: %4d tensors\n", __func__, ggml_type_name(kv.first), kv.second);
681681
}
682682
}
683683

@@ -1119,6 +1119,7 @@ std::string llama_model_loader::ftype_name() const {
11191119
}
11201120

11211121
void llama_model_loader::print_info() const {
1122+
return;
11221123
LLAMA_LOG_INFO("%s: file format = %s\n", __func__, llama_file_version_name(fver));
11231124
LLAMA_LOG_INFO("%s: file type = %s\n", __func__, llama_model_ftype_name(ftype).c_str());
11241125
if (n_bytes < GiB) {

src/llama-model.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1458,12 +1458,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
14581458
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
14591459
const bool is_swa = il < (int) hparams.n_layer && hparams.is_swa(il);
14601460
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
1461-
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(cpu_dev), is_swa);
1461+
//LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(cpu_dev), is_swa);
14621462
return {cpu_dev, &pimpl->cpu_buft_list};
14631463
}
14641464
const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
14651465
auto * dev = devices.at(layer_gpu);
1466-
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(dev), is_swa);
1466+
//LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s, is_swa = %d\n", il, ggml_backend_dev_name(dev), is_swa);
14671467
return {dev, &pimpl->gpu_buft_list.at(dev)};
14681468
};
14691469

@@ -4144,6 +4144,7 @@ uint64_t llama_model::n_elements() const {
41444144
}
41454145

41464146
void llama_model::print_info() const {
4147+
return;
41474148
const char * rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at(hparams.rope_scaling_type_train);
41484149

41494150
auto print_f = [](const std::function<uint32_t(uint32_t)> & f, uint32_t n) {

src/llama-vocab.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,7 @@ int32_t llama_vocab::impl::detokenize(
27312731
}
27322732

27332733
void llama_vocab::impl::print_info() const {
2734+
return;
27342735
LLAMA_LOG_INFO("%s: vocab type = %s\n", __func__, type_name().c_str());
27352736
LLAMA_LOG_INFO("%s: n_vocab = %u\n", __func__, vocab.n_tokens());
27362737
LLAMA_LOG_INFO("%s: n_merges = %u\n", __func__, (uint32_t) bpe_ranks.size());
@@ -3055,6 +3056,7 @@ std::string llama_vocab::detokenize(const std::vector<llama_token> & tokens, boo
30553056
}
30563057

30573058
void llama_vocab::print_info() const {
3059+
return;
30583060
pimpl->print_info();
30593061
}
30603062

0 commit comments

Comments
 (0)