1616#include < fstream>
1717#include < unordered_map>
1818#include < map>
19+ #include < regex>
20+ #include < numeric>
1921
2022#if defined(_MSC_VER)
2123#pragma warning(disable: 4244 4267) // possible loss of data
2426static void print_usage (int , char ** argv) {
2527 LOG (" \n example usage:\n " );
2628 LOG (" \n %s \\\n "
27- " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] \\\n "
28- " [--no-ppl] [-- chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n "
29- " [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n "
30- " [--parse-special]\n " , argv[0 ]);
29+ " -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] [--no-ppl] \\\n "
30+ " [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics ] \\\n "
31+ " [--no-ppl] [-- in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n "
32+ " [--parse-special] [...] \n " , argv[0 ]);
3133 LOG (" \n " );
3234}
3335
@@ -40,6 +42,21 @@ struct Stats {
4042 std::vector<int64_t > counts;
4143};
4244
45+ struct tensor_statistics {
46+ std::string tensor;
47+ Stats stats;
48+ float total_sqract = 0 .0f ;
49+ float mean_sqract = 0 .0f ;
50+ float max_sqract = 0 .0f ;
51+ float min_sqract = 0 .0f ;
52+ int elements = 0 ;
53+ float stddev = 0 .0f ;
54+ float active = 0 .0f ;
55+ float entropy = 0 .0f ;
56+ float zd = 0 .0f ;
57+ float cossim = 0 .0f ;
58+ };
59+
4360class IMatrixCollector {
4461public:
4562 IMatrixCollector () = default ;
@@ -49,6 +66,7 @@ class IMatrixCollector {
4966 void save_imatrix (int32_t n_chunk = -1 ) const ;
5067 bool load_imatrix_legacy (const char * fname);
5168 bool load_imatrix (const char * file_name);
69+ const std::unordered_map<std::string, Stats> & get_mstats () const { return m_stats; }
5270private:
5371 std::unordered_map<std::string, Stats> m_stats;
5472 common_params m_params;
@@ -78,6 +96,127 @@ static std::string filter_tensor_name(const char * name) {
7896 return wname;
7997}
8098
99+ static void process_tensor_name (const std::string & input, std::string & layer, std::string & tensor) {
100+ std::vector<std::string> name;
101+ std::istringstream stream (input);
102+ std::string item;
103+
104+ while (std::getline (stream, item, ' .' )) {
105+ name.push_back (item);
106+ }
107+ for (size_t i = 0 ; i < name.size (); ++i) {
108+ if (name[i] == " blk" && i + 1 < name.size ()) {
109+ layer = name[i + 1 ];
110+ break ;
111+ }
112+ }
113+ for (size_t i = 0 ; i < name.size (); ++i) {
114+ if (name[i] == " weight" && i > 0 ) {
115+ tensor = name[i - 1 ];
116+ break ;
117+ }
118+ }
119+
120+ if (tensor.empty ()) {
121+ tensor = input;
122+ }
123+ if (layer.empty ()) {
124+ layer = " -" ;
125+ }
126+ }
127+
128+ static void compute_statistics (std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
129+ // if (e.values.size() != e.counts.size()) {
130+ if (e.values .size () % e.counts .size () != 0 ) {
131+ LOG_ERR (" %s: activation size mismatch for tensor %s (%zu vs %zu)\n " , __func__, name.c_str (), e.counts .size (), e.values .size ());
132+ return ;
133+ }
134+ if (e.counts .empty ()) {
135+ LOG_ERR (" %s: there are no activations for tensor %s. The imatrix may be suboptimal\n " , __func__, name.c_str ());
136+ return ;
137+ }
138+
139+ const int n_mat = e.counts .size ();
140+ const int row_size = e.values .size () / n_mat;
141+
142+ std::vector<float > activations;
143+ activations.reserve (e.values .size ());
144+
145+ for (int i = 0 ; i < n_mat; ++i) {
146+ for (int j = 0 ; j < row_size; ++j) {
147+ activations.push_back (e.values [i*row_size + j] / e.counts [i]);
148+ }
149+ }
150+
151+ const float act_total = std::accumulate (activations.begin (), activations.end (), 0 .0f );
152+ const float act_max = *std::max_element (activations.begin (), activations.end ());
153+ const float act_min = *std::min_element (activations.begin (), activations.end ());
154+ const float act_mean = act_total / activations.size ();
155+ const float act_sqr_total = std::inner_product (activations.begin (), activations.end (), activations.begin (), 0 .0f );
156+ const float act_var = (act_sqr_total / activations.size ()) - (act_mean * act_mean);
157+ const float act_dev = std::sqrt (std::max (0 .0f , act_var));
158+ float threshold = 1e-5f ;
159+ const int inactive_count = std::count_if (activations.begin (), activations.end (),
160+ [threshold](const float v) { return fabsf (v) <= threshold; });
161+ const float active_ratio = 1 - static_cast <float >(inactive_count) / activations.size ();
162+
163+ float entropy = 0 ;
164+ if (act_total > 0 ) {
165+ for (const auto act : activations) {
166+ if (const float p = act / act_total; p > 0 ) {
167+ entropy -= p * std::log2 (p);
168+ }
169+ }
170+ }
171+
172+ int z_score = 0 ;
173+ if (act_dev > 0 .0f ) {
174+ for (const auto act : activations) {
175+ if (const float p = (act - act_mean) / act_dev; p > 1 ) {
176+ z_score++;
177+ }
178+ }
179+ }
180+
181+ auto & ts = tstats.emplace_back ();
182+ ts.tensor = name;
183+ ts.stats = e;
184+ ts.total_sqract = act_total;
185+ ts.mean_sqract = act_mean;
186+ ts.max_sqract = act_max;
187+ ts.min_sqract = act_min;
188+ ts.elements = static_cast <int >(activations.size ());
189+ ts.stddev = act_dev;
190+ ts.active = active_ratio;
191+ ts.entropy = entropy;
192+ ts.zd = static_cast <float >(z_score) / ts.elements ;
193+ }
194+
195+ static void compute_cossim (std::vector<tensor_statistics> & tstats) {
196+ static const std::regex pattern (R"( blk\.(\d+)\.)" );
197+ for (auto & ts : tstats) {
198+ if (std::smatch match; std::regex_search (ts.tensor , match, pattern)) {
199+ const int blk = std::stoi (match[1 ]);
200+ std::string tname (ts.tensor );
201+ tname.replace (match.position (1 ), match.length (1 ), std::to_string (blk-1 ));
202+ auto prev = std::find_if (tstats.begin (), tstats.end (),
203+ [tname](const tensor_statistics & t) { return t.tensor == tname; });
204+ if (prev != tstats.end ()) {
205+ const float dp = std::inner_product (ts.stats .values .begin (), ts.stats .values .end (),
206+ prev->stats .values .begin (), 0 .0f );
207+ const float curr_mag = std::sqrt (std::inner_product (ts.stats .values .begin (), ts.stats .values .end (),
208+ ts.stats .values .begin (), 0 .0f ));
209+ const float prev_mag = std::sqrt (std::inner_product (prev->stats .values .begin (), prev->stats .values .end (),
210+ prev->stats .values .begin (), 0 .0f ));
211+ const float cs = dp / (curr_mag * prev_mag);
212+ ts.cossim = cs;
213+ }
214+ } else {
215+ ts.cossim = 0 ;
216+ }
217+ }
218+ }
219+
81220bool IMatrixCollector::collect_imatrix (struct ggml_tensor * t, bool ask, void * user_data) {
82221 GGML_UNUSED (user_data);
83222
@@ -678,7 +817,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat
678817 return g_collector.collect_imatrix (t, ask, user_data);
679818}
680819
681-
682820struct results_log_softmax {
683821 double log_softmax;
684822 float logit;
@@ -926,6 +1064,113 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c
9261064 return true ;
9271065}
9281066
1067+ static bool show_statistics (const common_params & params) {
1068+ std::vector<tensor_statistics> ts;
1069+ if (params.in_files .empty () || params.in_files .size () > 1 ) {
1070+ LOG_ERR (" \n Error: a single imatrix file is required to compute tensor statistics\n\n " );
1071+ return false ;
1072+ }
1073+ if (g_collector.load_imatrix (params.in_files [0 ].c_str ())) {
1074+ for (const auto & [name, stats] :g_collector.get_mstats ()) {
1075+ compute_statistics (ts, name, stats);
1076+ }
1077+ } else {
1078+ LOG_ERR (" \n Error: %s is not a valid imatrix file\n\n " , params.in_files [0 ].c_str ());
1079+ return false ;
1080+ }
1081+ if (!ts.empty ()) {
1082+ compute_cossim (ts);
1083+ } else {
1084+ LOG_ERR (" Error: cannot compute statistics for %s\n\n " , params.in_files [0 ].c_str ());
1085+ return false ;
1086+ }
1087+
1088+ struct tensor_comparer {
1089+ bool operator ()(const tensor_statistics & a, const tensor_statistics & b) const {
1090+ std::string layer, name_a, name_b;
1091+ ;
1092+ process_tensor_name (a.tensor , layer, name_a);
1093+ process_tensor_name (b.tensor , layer, name_b);
1094+ return name_a < name_b || (name_a == name_b && a.total_sqract > b.total_sqract );
1095+ }
1096+ };
1097+ std::sort (ts.begin (), ts.end (), tensor_comparer ());
1098+
1099+ struct weighted_stats {
1100+ float weighted_bias = 0 .0f ;
1101+ float weighted_zd = 0 .0f ;
1102+ float weighted_cossim = 0 .0f ;
1103+ int total_elements = 0 ;
1104+ };
1105+ std::map<int , weighted_stats> ws;
1106+
1107+ LOG_INF (" \n Computing statistics for %s (%d tensors)\n " , params.in_files [0 ].c_str (), static_cast <int >(ts.size ()));
1108+ LOG_INF (" \n %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\t %s\n " , " Layer" , " Tensor" , " Σ(Act²)" ,
1109+ " Min" , " Max" , " μ" , " σ" , " % Active" , " N" , " Entropy" , " E (norm)" , " ZD" ,
1110+ " CosSim" );
1111+ LOG_INF (
1112+ " =============================================================================================================="
1113+ " ===========================================================\n " );
1114+ for (const auto & tstat : ts) {
1115+ std::string layer, name;
1116+ process_tensor_name (tstat.tensor , layer, name);
1117+
1118+ int blk;
1119+ try {
1120+ blk = std::stoi (layer);
1121+ } catch (const std::exception & e) {
1122+ blk = -1 ; // not a block layer
1123+ }
1124+
1125+ LOG_INF (" %5s\t %-20s\t %10.2f\t %8.4f\t %11.4f\t %6.2f\t %6.2f\t %8.2f%%\t %6d\t %10.4f\t %6.2f%%\t %10.2f%%\t %8.4f\n " ,
1126+ layer.c_str (), name.c_str (), tstat.total_sqract , tstat.min_sqract , tstat.max_sqract , tstat.mean_sqract ,
1127+ tstat.stddev , tstat.active * 100 .0f , tstat.elements , tstat.entropy ,
1128+ 100 .0f * (tstat.entropy / std::log2 (tstat.elements )), 100 .0f * tstat.zd , tstat.cossim );
1129+
1130+ const float weighted_bias = tstat.elements * tstat.total_sqract ;
1131+ const float weighted_zd = tstat.elements * tstat.zd ;
1132+ const float weighted_cossim = tstat.elements * tstat.cossim ;
1133+
1134+ if (ws.find (blk) != ws.end ()) {
1135+ ws[blk].weighted_bias += weighted_bias;
1136+ ws[blk].weighted_zd += weighted_zd;
1137+ ws[blk].weighted_cossim += weighted_cossim;
1138+ ws[blk].total_elements += tstat.elements ;
1139+ } else {
1140+ weighted_stats temp_ws;
1141+ temp_ws.weighted_bias = weighted_bias;
1142+ temp_ws.weighted_zd = weighted_zd;
1143+ temp_ws.weighted_cossim = weighted_cossim;
1144+ temp_ws.total_elements = tstat.elements ;
1145+ ws[blk] = temp_ws;
1146+ }
1147+ }
1148+
1149+ const int layers = std::count_if (ws.begin (), ws.end (), [](const auto & kv) { return kv.first >= 0 ; });
1150+ LOG_INF (" \n Computing weighted average statistics per layer (%d layers)\n " , layers);
1151+ LOG_INF (" \n %s\t %s\t %s\t %s\n " , " Layer" , " μΣ(Act²)" , " μZD" , " μCosSim" );
1152+ LOG_INF (" ================================================\n " );
1153+ for (const auto & [first, second] : ws) {
1154+ const auto & layer = first;
1155+ const auto & stats = second;
1156+
1157+ if (stats.total_elements == 0 ) {
1158+ continue ;
1159+ }
1160+
1161+ if (layer >= 0 ) {
1162+ const float bias = stats.weighted_bias / stats.total_elements ;
1163+ const float zd = stats.weighted_zd / stats.total_elements ;
1164+ const float cossim = stats.weighted_cossim / stats.total_elements ;
1165+
1166+ LOG_INF (" %5d\t %14.2f\t %10.4f%%\t %6.4f\n " , layer, bias, 100 .0f * zd, cossim);
1167+ }
1168+ }
1169+ LOG_INF (" \n " );
1170+
1171+ return true ;
1172+ }
1173+
9291174int main (int argc, char ** argv) {
9301175 common_params params;
9311176
@@ -938,6 +1183,19 @@ int main(int argc, char ** argv) {
9381183 return 1 ;
9391184 }
9401185
1186+ if (params.in_files .empty () || params.model .path .empty ()) {
1187+ LOG_ERR (" %s: an input file is required" , __func__);
1188+ print_usage (argc, argv);
1189+ return 1 ;
1190+ }
1191+
1192+ if (params.show_statistics ) {
1193+ if (!show_statistics (params)) {
1194+ return 1 ;
1195+ }
1196+ return 0 ;
1197+ }
1198+
9411199 common_init ();
9421200
9431201 const int32_t n_ctx = params.n_ctx ;
0 commit comments