@@ -927,6 +927,7 @@ enum e_model {
927927
928928static const size_t kB = 1024 ;
929929static const size_t MB = kB *kB ;
930+ static const size_t GB = kB *kB *kB ;
930931
931932// default hparams (LLaMA 7B)
932933struct llama_hparams {
@@ -1280,6 +1281,7 @@ struct llama_model_loader {
12801281 int n_created = 0 ;
12811282
12821283 int64_t n_elements = 0 ;
1284+ size_t n_bytes = 0 ;
12831285
12841286 bool use_mmap = false ;
12851287
@@ -1312,6 +1314,7 @@ struct llama_model_loader {
13121314 const char * name = gguf_get_tensor_name (ctx_gguf, i);
13131315 struct ggml_tensor * t = ggml_get_tensor (ctx_meta, name);
13141316 n_elements += ggml_nelements (t);
1317+ n_bytes += ggml_nbytes (t);
13151318 }
13161319
13171320 LLAMA_LOG_INFO (" %s: loaded meta data with %d key-value pairs and %d tensors from %s (version %s)\n " ,
@@ -1909,7 +1912,12 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
19091912 LLAMA_LOG_INFO (" %s: freq_scale = %g\n " , __func__, hparams.rope_freq_scale );
19101913 LLAMA_LOG_INFO (" %s: model type = %s\n " , __func__, llama_model_type_name (model.type ));
19111914 LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, llama_model_ftype_name (model.ftype ).c_str ());
1912- LLAMA_LOG_INFO (" %s: model size = %.2f B\n " , __func__, ml.n_elements *1e-9 );
1915+ LLAMA_LOG_INFO (" %s: model params = %.2f B\n " , __func__, ml.n_elements *1e-9 );
1916+ if (ml.n_bytes < GB) {
1917+ LLAMA_LOG_INFO (" %s: model size = %.2f MiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1918+ } else {
1919+ LLAMA_LOG_INFO (" %s: model size = %.2f GiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1920+ }
19131921
19141922 // general kv
19151923 LLAMA_LOG_INFO (" %s: general.name = %s\n " , __func__, model.name .c_str ());
@@ -3495,7 +3503,7 @@ static struct ggml_cgraph * llm_build_starcoder(
34953503
34963504 ggml_allocr_alloc (lctx.alloc , token);
34973505 if (!ggml_allocr_is_measure (lctx.alloc )) {
3498- memcpy (token->data , embd, N * n_embd * ggml_element_size (inpL ));
3506+ memcpy (token->data , embd, N * n_embd * ggml_element_size (token ));
34993507 }
35003508 }
35013509
0 commit comments