@@ -1952,20 +1952,69 @@ void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
19521952 }
19531953
19541954 LLAMA_LOG_INFO (" %s: model type = %s\n " , __func__, llama_model_type_name (model).c_str ());
1955- LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, llama_model_ftype_name (model).c_str ());
1955+ LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, llama_model_type_name (model).c_str ());
19561956 if (ml.n_elements >= 1e12 ) {
1957- LLAMA_LOG_INFO (" %s: model params = %.2f T\n " , __func__, ml.n_elements *1e-12 );
1957+ LLAMA_LOG_INFO (" %s: model params = %.3f T\n " , __func__, ml.n_elements *1e-12 );
19581958 } else if (ml.n_elements >= 1e9 ) {
1959- LLAMA_LOG_INFO (" %s: model params = %.2f B\n " , __func__, ml.n_elements *1e-9 );
1959+ LLAMA_LOG_INFO (" %s: model params = %.3f B\n " , __func__, ml.n_elements *1e-9 );
19601960 } else if (ml.n_elements >= 1e6 ) {
1961- LLAMA_LOG_INFO (" %s: model params = %.2f M\n " , __func__, ml.n_elements *1e-6 );
1961+ LLAMA_LOG_INFO (" %s: model params = %.3f M\n " , __func__, ml.n_elements *1e-6 );
19621962 } else {
1963- LLAMA_LOG_INFO (" %s: model params = %.2f K\n " , __func__, ml.n_elements *1e-3 );
1963+ LLAMA_LOG_INFO (" %s: model params = %.3f K\n " , __func__, ml.n_elements *1e-3 );
1964+ // LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, ml.n_elements*1e-12);
1965+ // } else if (ml.n_elements >= 1e9) {
1966+ // LLAMA_LOG_INFO("%s: model params = %.2f B\n", __func__, ml.n_elements*1e-9);
1967+ // } else if (ml.n_elements >= 1e6) {
1968+ // LLAMA_LOG_INFO("%s: model params = %.2f M\n", __func__, ml.n_elements*1e-6);
1969+ // } else {
1970+ // LLAMA_LOG_INFO("%s: model params = %.2f K\n", __func__, ml.n_elements*1e-3);
19641971 }
1965- if (ml.n_bytes < GiB) {
1966- LLAMA_LOG_INFO (" %s: model size = %.2f MiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1967- } else {
1968- LLAMA_LOG_INFO (" %s: model size = %.2f GiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1972+
1973+ LLAMA_LOG_INFO (" %s: model size = %.2f Bytes (%.3f BPW) \n " , __func__, ml.n_bytes /1.0 , ml.n_bytes *8.0 /ml.n_elements );
1974+ LLAMA_LOG_INFO (" %s: model size = %.2f KB (%.3f BPW) \n " , __func__, ml.n_bytes /1000.0 , ml.n_bytes *8.0 /ml.n_elements );
1975+ LLAMA_LOG_INFO (" %s: model size = %.2f KiB (%.3f BPW) \n " , __func__, ml.n_bytes /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1976+ LLAMA_LOG_INFO (" %s: model size = %.2f MB (%.3f BPW) \n " , __func__, ml.n_bytes /1000.0 /1000.0 , ml.n_bytes *8.0 /ml.n_elements );
1977+ LLAMA_LOG_INFO (" %s: model size = %.2f MiB (%.3f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1978+ LLAMA_LOG_INFO (" %s: model size = %.2f GB (%.3f BPW) \n " , __func__, ml.n_bytes /1000.0 /1000.0 /1000.0 , ml.n_bytes *8.0 /ml.n_elements );
1979+ LLAMA_LOG_INFO (" %s: model size = %.2f GiB (%.3f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1980+
1981+ // if (ml.n_bytes < GiB) {
1982+ // LLAMA_LOG_INFO("%s: model size = %.3f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1983+ // } else {
1984+ // LLAMA_LOG_INFO("%s: model size = %.3f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
1985+ // }
1986+
1987+ {
1988+ auto n_bytes = ml.n_bytes ;
1989+ auto n_elements = ml.n_elements ;
1990+ auto meta_tke = ml.get_tensor_meta (" token_embd.weight" );
1991+ auto meta_out = ml.get_tensor_meta (" output.weight" );
1992+ if (meta_tke && meta_out) {
1993+ n_bytes -= ggml_nbytes (meta_tke);
1994+ n_elements -= ggml_nelements (meta_tke);
1995+ n_bytes -= ggml_nbytes (meta_out);
1996+ n_elements -= ggml_nelements (meta_out);
1997+
1998+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f Bytes (%.3f BPW) \n " , __func__, n_bytes/1.0 , n_bytes*8.0 /n_elements);
1999+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f KB (%.3f BPW) \n " , __func__, n_bytes/1000.0 , n_bytes*8.0 /n_elements);
2000+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f KiB (%.3f BPW) \n " , __func__, n_bytes/1024.0 , n_bytes*8.0 /n_elements);
2001+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f MB (%.3f BPW) \n " , __func__, n_bytes/1000.0 /1000.0 , n_bytes*8.0 /n_elements);
2002+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f MiB (%.3f BPW) \n " , __func__, n_bytes/1024.0 /1024.0 , n_bytes*8.0 /n_elements);
2003+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f GB (%.3f BPW) \n " , __func__, n_bytes/1000.0 /1000.0 /1000.0 , n_bytes*8.0 /n_elements);
2004+ LLAMA_LOG_INFO (" %s: repeating layers = %.2f GiB (%.3f BPW) \n " , __func__, n_bytes/1024.0 /1024.0 /1024.0 , n_bytes*8.0 /n_elements);
2005+
2006+ // if (n_bytes < GiB) {
2007+ // LLAMA_LOG_INFO("%s: repeating layers = %.3f MiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0, n_bytes*8.0/n_elements);
2008+ // } else {
2009+ // LLAMA_LOG_INFO("%s: repeating layers = %.3f GiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
2010+ // }
2011+
2012+ if (ml.n_elements >= 1e9 ) {
2013+ LLAMA_LOG_INFO (" , %.3f B parameters)\n " , n_elements*1e-9 );
2014+ } else {
2015+ LLAMA_LOG_INFO (" , %.3f M parameters)\n " , n_elements*1e-6 );
2016+ }
2017+ }
19692018 }
19702019
19712020 // general kv
0 commit comments