Some logging back

Nexesenex · Nexesenex · commit 19a0f0586036 · 2025-01-13T18:34:05.000+01:00
diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp
@@ -484,7 +484,7 @@ llama_model_loader::llama_model_loader(const std::string & fname, bool use_mmap,
 
     // determine file type based on the number of tensors for each quantization and print meta data
     // TODO: make optional
-    if(false) //disable this log for now
+    // if(false) //disable this log for now
     {
         std::map<enum ggml_type, uint32_t> n_type;
 
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -1952,20 +1952,69 @@ void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
     }
 
     LLAMA_LOG_INFO("%s: model type       = %s\n",     __func__, llama_model_type_name(model).c_str());
-    LLAMA_LOG_INFO("%s: model ftype      = %s\n",     __func__, llama_model_ftype_name(model).c_str());
+    LLAMA_LOG_INFO("%s: model ftype      = %s\n",     __func__, llama_model_type_name(model).c_str());
     if (ml.n_elements >= 1e12) {
-        LLAMA_LOG_INFO("%s: model params     = %.2f T\n", __func__, ml.n_elements*1e-12);
+        LLAMA_LOG_INFO("%s: model params     = %.3f T\n", __func__, ml.n_elements*1e-12);
     } else if (ml.n_elements >= 1e9) {
-        LLAMA_LOG_INFO("%s: model params     = %.2f B\n", __func__, ml.n_elements*1e-9);
+        LLAMA_LOG_INFO("%s: model params     = %.3f B\n", __func__, ml.n_elements*1e-9);
     } else if (ml.n_elements >= 1e6) {
-        LLAMA_LOG_INFO("%s: model params     = %.2f M\n", __func__, ml.n_elements*1e-6);
+        LLAMA_LOG_INFO("%s: model params     = %.3f M\n", __func__, ml.n_elements*1e-6);
     } else {
-        LLAMA_LOG_INFO("%s: model params     = %.2f K\n", __func__, ml.n_elements*1e-3);
+        LLAMA_LOG_INFO("%s: model params     = %.3f K\n", __func__, ml.n_elements*1e-3);
+        // LLAMA_LOG_INFO("%s: model params     = %.2f T\n", __func__, ml.n_elements*1e-12);
+    // } else if (ml.n_elements >= 1e9) {
+        // LLAMA_LOG_INFO("%s: model params     = %.2f B\n", __func__, ml.n_elements*1e-9);
+    // } else if (ml.n_elements >= 1e6) {
+        // LLAMA_LOG_INFO("%s: model params     = %.2f M\n", __func__, ml.n_elements*1e-6);
+    // } else {
+        // LLAMA_LOG_INFO("%s: model params     = %.2f K\n", __func__, ml.n_elements*1e-3);
     }
-    if (ml.n_bytes < GiB) {
-        LLAMA_LOG_INFO("%s: model size       = %.2f MiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0,        ml.n_bytes*8.0/ml.n_elements);
-    } else {
-        LLAMA_LOG_INFO("%s: model size       = %.2f GiB (%.2f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
+
+    LLAMA_LOG_INFO("%s: model size       =   %.2f Bytes (%.3f BPW) \n", __func__, ml.n_bytes/1.0,                    ml.n_bytes*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: model size       =      %.2f KB    (%.3f BPW) \n", __func__, ml.n_bytes/1000.0,               ml.n_bytes*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: model size       =      %.2f KiB   (%.3f BPW) \n", __func__, ml.n_bytes/1024.0,               ml.n_bytes*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: model size       =         %.2f MB    (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0,        ml.n_bytes*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: model size       =         %.2f MiB   (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0,        ml.n_bytes*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: model size       =            %.2f GB    (%.3f BPW) \n", __func__, ml.n_bytes/1000.0/1000.0/1000.0, ml.n_bytes*8.0/ml.n_elements);
+    LLAMA_LOG_INFO("%s: model size       =            %.2f GiB   (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
+
+    // if (ml.n_bytes < GiB) {
+        // LLAMA_LOG_INFO("%s: model size       = %.3f MiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0,        ml.n_bytes*8.0/ml.n_elements);
+    // } else {
+        // LLAMA_LOG_INFO("%s: model size       = %.3f GiB (%.3f BPW) \n", __func__, ml.n_bytes/1024.0/1024.0/1024.0, ml.n_bytes*8.0/ml.n_elements);
+    // }
+
+    {
+        auto n_bytes = ml.n_bytes;
+        auto n_elements = ml.n_elements;
+        auto meta_tke = ml.get_tensor_meta("token_embd.weight");
+        auto meta_out = ml.get_tensor_meta("output.weight");
+        if (meta_tke && meta_out) {
+            n_bytes -= ggml_nbytes(meta_tke);
+            n_elements -= ggml_nelements(meta_tke);
+            n_bytes -= ggml_nbytes(meta_out);
+            n_elements -= ggml_nelements(meta_out);
+			
+            LLAMA_LOG_INFO("%s: repeating layers =   %.2f Bytes (%.3f BPW) \n", __func__, n_bytes/1.0,                    n_bytes*8.0/n_elements);
+            LLAMA_LOG_INFO("%s: repeating layers =      %.2f KB    (%.3f BPW) \n", __func__, n_bytes/1000.0,               n_bytes*8.0/n_elements);
+            LLAMA_LOG_INFO("%s: repeating layers =      %.2f KiB   (%.3f BPW) \n", __func__, n_bytes/1024.0,               n_bytes*8.0/n_elements);
+            LLAMA_LOG_INFO("%s: repeating layers =         %.2f MB    (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0,        n_bytes*8.0/n_elements);
+            LLAMA_LOG_INFO("%s: repeating layers =         %.2f MiB   (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0,        n_bytes*8.0/n_elements);
+            LLAMA_LOG_INFO("%s: repeating layers =            %.2f GB    (%.3f BPW) \n", __func__, n_bytes/1000.0/1000.0/1000.0, n_bytes*8.0/n_elements);
+            LLAMA_LOG_INFO("%s: repeating layers =            %.2f GiB   (%.3f BPW) \n", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
+			
+            // if (n_bytes < GiB) {
+                // LLAMA_LOG_INFO("%s: repeating layers = %.3f MiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0,        n_bytes*8.0/n_elements);
+            // } else {
+                // LLAMA_LOG_INFO("%s: repeating layers = %.3f GiB (%.3f BPW", __func__, n_bytes/1024.0/1024.0/1024.0, n_bytes*8.0/n_elements);
+            // }
+
+            if (ml.n_elements >= 1e9) {
+                LLAMA_LOG_INFO(", %.3f B parameters)\n", n_elements*1e-9);
+            } else {
+                LLAMA_LOG_INFO(", %.3f M parameters)\n", n_elements*1e-6);
+            }
+        }
     }
 
     // general kv

Original file line number	Diff line number	Diff line change
`@@ -484,7 +484,7 @@ llama_model_loader::llama_model_loader(const std::string & fname, bool use_mmap,`
`484`	`484`
`485`	`485`	`// determine file type based on the number of tensors for each quantization and print meta data`
`486`	`486`	`// TODO: make optional`
`487`		`- if(false) //disable this log for now`
	`487`	`+ // if(false) //disable this log for now`
`488`	`488`	`{`
`489`	`489`	`std::map<enum ggml_type, uint32_t> n_type;`
`490`	`490`