Skip to content

Commit 19a0f05

Browse files
committed
Some logging back
1 parent a56ecd2 commit 19a0f05

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

src/llama-model-loader.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ llama_model_loader::llama_model_loader(const std::string & fname, bool use_mmap,
484484

485485
// determine file type based on the number of tensors for each quantization and print meta data
486486
// TODO: make optional
487-
if(false) //disable this log for now
487+
// if(false) //disable this log for now
488488
{
489489
std::map<enum ggml_type, uint32_t> n_type;
490490

src/llama-model.cpp

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1952,20 +1952,69 @@ void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
19521952
}
19531953

19541954
LLAMA_LOG_INFO("%s: model type = %s\n", __func__, llama_model_type_name(model).c_str());
1955-
LLAMA_LOG_INFO("%s: model ftype = %s\n", __func__, llama_model_ftype_name(model).c_str());
1955+
LLAMA_LOG_INFO("%s: model ftype = %s\n", __func__, llama_model_type_name(model).c_str());
19561956
if (ml.n_elements >= 1e12) {
1957-
LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
1957+
LLAMA_LOG_INFO("%s: model params = %.3f T\n", __func__, ml.n_elements*1e-12);
19581958
} else if (ml.n_elements >= 1e9) {
1959-
LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
1959+
LLAMA_LOG_INFO("%s: model params = %.3f B\n", __func__, ml.n_elements*1e-9);
19601960
} else if (ml.n_elements >= 1e6) {
1961-
LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
1961+
LLAMA_LOG_INFO("%s: model params = %.3f M\n", __func__, ml.n_elements*1e-6);
19621962
} else {
1963-
LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
1963+
LLAMA_LOG_INFO("%s: model params = %.3f K\n", __func__, ml.n_elements*1e-3);
1964+
// LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
1965+
// } else if (ml.n_elements >= 1e9) {
1966+
// LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
1967+
// } else if (ml.n_elements >= 1e6) {
1968+
// LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
1969+
// } else {
1970+
// LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
19641971
}
1965-
if (ml.n_bytes < GiB) {
1966-
LLAMA_LOG_INFO("%s: model size = %.2f MiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1967-
} else {
1968-
LLAMA_LOG_INFO("%s: model size = %.2f GiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1972+
1973+
LLAMA_LOG_INFO("%s: model size = %.2f Bytes (%.3f BPW) \n", __func__, ml.n_bytes/1.0, ml.n_bytes*8.0/ml.n_elements);
1974+
LLAMA_LOG_INFO("%s: model size = %.2f KB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0, ml.n_bytes*8.0/ml.n_elements);
1975+
LLAMA_LOG_INFO("%s: model size = %.2f KiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0, ml.n_bytes*8.0/ml.n_elements);
1976+
LLAMA_LOG_INFO("%s: model size = %.2f MB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
1977+
LLAMA_LOG_INFO("%s: model size = %.2f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1978+
LLAMA_LOG_INFO("%s: model size = %.2f GB (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
1979+
LLAMA_LOG_INFO("%s: model size = %.2f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1980+
1981+
// if (ml.n_bytes < GiB) {
1982+
// LLAMA_LOG_INFO("%s: model size = %.3f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1983+
// } else {
1984+
// LLAMA_LOG_INFO("%s: model size = %.3f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1985+
// }
1986+
1987+
{
1988+
auto n_bytes = ml.n_bytes;
1989+
auto n_elements = ml.n_elements;
1990+
auto meta_tke = ml.get_tensor_meta("token_embd.weight");
1991+
auto meta_out = ml.get_tensor_meta("output.weight");
1992+
if (meta_tke && meta_out) {
1993+
n_bytes -= ggml_nbytes(meta_tke);
1994+
n_elements -= ggml_nelements(meta_tke);
1995+
n_bytes -= ggml_nbytes(meta_out);
1996+
n_elements -= ggml_nelements(meta_out);
1997+
1998+
LLAMA_LOG_INFO("%s: repeating layers = %.2f Bytes (%.3f BPW) \n", __func__, n_bytes/1.0, n_bytes*8.0/n_elements);
1999+
LLAMA_LOG_INFO("%s: repeating layers = %.2f KB (%.3f BPW) \n", __func__, n_bytes/1000.0, n_bytes*8.0/n_elements);
2000+
LLAMA_LOG_INFO("%s: repeating layers = %.2f KiB (%.3f BPW) \n", __func__, n_bytes/1024.0, n_bytes*8.0/n_elements);
2001+
LLAMA_LOG_INFO("%s: repeating layers = %.2f MB (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0, n_bytes*8.0/n_elements);
2002+
LLAMA_LOG_INFO("%s: repeating layers = %.2f MiB (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
2003+
LLAMA_LOG_INFO("%s: repeating layers = %.2f GB (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0/1000.0, n_bytes*8.0/n_elements);
2004+
LLAMA_LOG_INFO("%s: repeating layers = %.2f GiB (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
2005+
2006+
// if (n_bytes < GiB) {
2007+
// LLAMA_LOG_INFO("%s: repeating layers = %.3f MiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
2008+
// } else {
2009+
// LLAMA_LOG_INFO("%s: repeating layers = %.3f GiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
2010+
// }
2011+
2012+
if (ml.n_elements >= 1e9) {
2013+
LLAMA_LOG_INFO(", %.3f B parameters)\n", n_elements*1e-9);
2014+
} else {
2015+
LLAMA_LOG_INFO(", %.3f M parameters)\n", n_elements*1e-6);
2016+
}
2017+
}
19692018
}
19702019

19712020
// general kv

0 commit comments

Comments
 (0)