Add --activation-statistics parameter

EAddario · EAddario · commit 5bb2def02dcf · 2025-08-07T17:41:21.000+01:00
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -2707,6 +2707,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.show_statistics = true;
         }
     ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+    add_opt(common_arg(
+        {"--activation-statistics"},
+        string_format("generate data to compute activation-based statistics (default: %s)", params.show_statistics ? "true" : "false"),
+        [](common_params & params) {
+            params.activation_statistics = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
     add_opt(common_arg(
         {"--parse-special"},
         string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
diff --git a/common/common.h b/common/common.h
@@ -443,10 +443,11 @@ struct common_params {
     int32_t i_chunk     =  0; // start processing from this chunk
     int8_t  imat_dat    =  0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat)
 
-    bool process_output  = false; // collect data for the output tensor
-    bool compute_ppl     = true;  // whether to compute perplexity
-    bool show_statistics = false; // show imatrix statistics per tensor
-    bool parse_special   = false; // whether to parse special tokens during imatrix tokenization
+    bool process_output         = false; // collect data for the output tensor
+    bool compute_ppl            = true;  // whether to compute perplexity
+    bool show_statistics        = false; // show imatrix statistics per tensor
+    bool activation_statistics  = false; // generate data to calculate activation based statistics
+    bool parse_special          = false; // whether to parse special tokens during imatrix tokenization
 
     // cvector-generator params
     int n_pca_batch = 100;
diff --git a/tools/imatrix/imatrix.cpp b/tools/imatrix/imatrix.cpp
@@ -30,7 +30,7 @@ static void print_usage(int, char ** argv) {
             "       -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n"
             "       [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
             "       [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
-            "       [--show-statistics] [...]\n" , argv[0]);
+            "       [--activation-statistics] [--show-statistics] [...]\n" , argv[0]);
     LOG("\n");
 }
 
@@ -428,6 +428,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
             // broadcast, when loading an old imatrix
             e.counts.resize(n_as, e.counts[0]);
         }
+        // ToDo: find an efficient way to implement --activation-statistics to avoid doubling the imatrix size by default
         if (e.values.empty()) {
             e.activations.resize(src1->ne[0]*n_as, 0);
             e.values.resize(src1->ne[0]*n_as, 0);