1+ #include " arg.h"
2+ #include " common.h"
3+ #include " llama-impl.h"
4+ #include " llama.h"
5+ #include " log.h"
6+
17#include < algorithm>
28#include < chrono>
39#include < cmath>
1016#include < thread>
1117#include < unordered_map>
1218#include < vector>
13-
14- #include " arg.h"
15- #include " common.h"
16- #include " llama.h"
17- #include " log.h"
19+ #include < regex>
1820
1921#if defined(_MSC_VER)
2022#pragma warning(disable: 4244 4267) // possible loss of data
@@ -36,16 +38,17 @@ struct Stats {
3638
3739struct tensor_statistics {
3840 std::string tensor;
39- float total = 0 ;
40- float mean = 0 ;
41- float max = 0 ;
42- float min = 0 ;
41+ Stats stats;
42+ float total_bias = 0 ;
43+ float mean_bias = 0 ;
44+ float max_bias = 0 ;
45+ float min_bias = 0 ;
46+ int elements = 0 ;
4347 float stddev = 0 ;
44- float cv = 0 ;
45- float zd = 0 ;
4648 float active = 0 ;
4749 float entropy = 0 ;
48- int elements = 0 ;
50+ float zd = 0 ;
51+ float cossim = 0 ;
4952};
5053
5154class IMatrixCollector {
@@ -332,7 +335,7 @@ void IMatrixCollector::save_imatrix(int ncall) const {
332335 LOG_DBGV (1 , " %s: stored collected data after %d chunks in %s\n " , __func__, m_last_call, fname.c_str ());
333336}
334337
335- bool IMatrixCollector::load_imatrix (const char * fname, std::vector<tensor_statistics> * ts ) {
338+ bool IMatrixCollector::load_imatrix (const char * fname, std::vector<tensor_statistics> * tstats ) {
336339 std::ifstream in (fname, std::ios::binary);
337340 if (!in) {
338341 LOG_ERR (" %s: failed to open %s\n " ,__func__, fname);
@@ -381,58 +384,78 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_stati
381384 // Recreate the state as expected by save_imatrix(), and correct for weighted sum.
382385 std::vector<float > activations;
383386 activations.reserve (nval);
384-
385387 for (int i = 0 ; i < nval; i++) {
386388 e.values [i] += tmp[i];
387389 e.counts [i] += ncall;
388- activations.push_back (e.values [i] / static_cast < float >( e.counts [i]) );
390+ activations.push_back (e.values [i] / e.counts [i]);
389391 }
390392 e.ncall += ncall;
391393
392- if (ts) {
393- float total_bias = std::accumulate (activations.begin (), activations.end (), 0 .0f );
394- float max_bias = * std::max_element (activations.begin (), activations.end ());
395- float min_bias = * std::min_element (activations.begin (), activations.end ());
396- float mean_bias = total_bias / activations.size ();
397- float sq_total_bias = std::inner_product (activations.begin (), activations.end (), activations.begin (), 0 .0f );
398- float dev = std::sqrt ((sq_total_bias / activations.size ()) - (mean_bias * mean_bias));
399- float rmsd = mean_bias > 0 .0f ? dev / mean_bias : 0 .0f ;
400-
401- float threshold = 1e-6f ;
402- int inactive_count = std::count_if (activations.begin (), activations.end (), [threshold](const float v) { return fabs (v) < threshold; });
403- float active_ratio = 1 - (static_cast <float >(inactive_count) / activations.size ());
404-
405- float ent = 0 .0f ;
406- if (total_bias > 0 ) {
394+ if (tstats) {
395+ float total = std::accumulate (activations.begin (), activations.end (), 0 .0f );
396+ float max = * std::max_element (activations.begin (), activations.end ());
397+ float min = * std::min_element (activations.begin (), activations.end ());
398+ float mean = total / activations.size ();
399+ float sq_total = std::inner_product (activations.begin (), activations.end (), activations.begin (), 0 .0f );
400+ float dev = std::sqrt ((sq_total / activations.size ()) - (mean * mean));
401+
402+ float threshold = min + min * 0 .5f ;
403+ int inactive_count = std::count_if (activations.begin (), activations.end (), [threshold](const float v) { return fabs (v) <= threshold; });
404+ float active_ratio = 1 - static_cast <float >(inactive_count) / activations.size ();
405+
406+ float ent = 0 ;
407+ if (total > 0 ) {
407408 for (auto act : activations) {
408- if (float p = act / total_bias ; p > 0 ) {
409+ if (float p = act / total ; p > 0 ) {
409410 ent -= p* std::log2 (p);
410411 }
411412 }
412413 }
413414
414415 int z_score = 0 ;
415416 for (auto act : activations) {
416- if (float p = (act - mean_bias ) / dev; p > 1 ) {
417+ if (float p = (act - mean ) / dev; p > 1 ) {
417418 z_score++;
418419 }
419420 }
420421
421- ts->emplace_back ();
422- auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] = (*ts)[i];
423- tensor = name_as_vec.data ();
424- total = total_bias;
425- mean = mean_bias;
426- max = max_bias;
427- min = min_bias;
428- stddev = dev;
429- cv = rmsd;
430- active = active_ratio;
431- entropy = ent;
432- elements = static_cast <int >(activations.size ());
433- zd = static_cast <float >(z_score) / static_cast <float >(elements);
422+ tstats->emplace_back ();
423+ auto & ts = (*tstats)[i];
424+ ts.tensor = name_as_vec.data ();
425+ ts.stats = e;
426+ ts.total_bias = total;
427+ ts.mean_bias = mean;
428+ ts.max_bias = max;
429+ ts.min_bias = min;
430+ ts.elements = static_cast <int >(activations.size ());
431+ ts.stddev = dev;
432+ ts.active = active_ratio;
433+ ts.entropy = ent;
434+ ts.zd = static_cast <float >(z_score) / ts.elements ;
435+ }
436+ }
437+
438+ if (tstats) {
439+ static const std::regex pattern (R"( blk\.(\d+)\.)" );
440+ for (auto & ts : *tstats) {
441+ if (std::smatch match; std::regex_search (ts.tensor , match, pattern)) {
442+ const int blk = std::stoi (match[1 ]);
443+ std::string tname (ts.tensor );
444+ tname.replace (match.position (1 ), match.length (1 ), std::to_string (blk-1 ));
445+ auto prev = std::find_if (tstats->begin (), tstats->end (), [tname](const tensor_statistics & t) { return t.tensor == tname; });
446+ if (prev != tstats->end ()) {
447+ const float dp = std::inner_product (ts.stats .values .begin (), ts.stats .values .end (), prev->stats .values .begin (), 0 .0f );
448+ const float curr_mag = std::sqrt (std::inner_product (ts.stats .values .begin (), ts.stats .values .end (), ts.stats .values .begin (), 0 .0f ));
449+ const float prev_mag = std::sqrt (std::inner_product (prev->stats .values .begin (), prev->stats .values .end (), prev->stats .values .begin (), 0 .0f ));
450+ const float cs = dp / (curr_mag * prev_mag);
451+ ts.cossim = cs;
452+ }
453+ } else {
454+ ts.cossim = 0 ;
455+ }
434456 }
435457 }
458+
436459 return true ;
437460}
438461
@@ -700,20 +723,22 @@ int main(int argc, char ** argv) {
700723 std::string layer, name_a, name_b;;
701724 process_tensor_name (a.tensor , layer, name_a);
702725 process_tensor_name (b.tensor , layer, name_b);
703- return name_a < name_b || (name_a == name_b && a.total > b.total );
726+ return name_a < name_b || (name_a == name_b && a.total_bias > b.total_bias );
704727 }
705728 };
706729 std::sort (ts.begin (), ts.end (), tensor_comparer ());
707730
708731 LOG_INF (" \n Computing statistics for %s (%d tensors)\n " , params.in_files [0 ].c_str (), static_cast <int >(ts.size ()));
709- LOG_INF (" \n %5s \t %-20s \t %10s \t %7s \t %12s \t %9s \t %10s \t %9s \t %6s \t %12s \t %7s \t %10s \n " ,
710- " Layer" , " Tensor" , " Σ(Bias)" , " Min" , " Max" , " μ" , " σ" , " % Active" , " N" , " Entropy" , " E (norm)" , " ZD Score " );
711- LOG_INF (" ========================================================================================================================================================================== \n " );
712- for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) {
732+ LOG_INF (" \n %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \t %s \n " ,
733+ " Layer" , " Tensor" , " Σ(Bias)" , " Min" , " Max" , " μ" , " σ" , " % Active" , " N" , " Entropy" , " E (norm)" , " ZD" , " CosSim " );
734+ LOG_INF (" =========================================================================================================================================================================\n " );
735+ for (const auto & tstat : ts) {
713736 std::string layer, name;
714- process_tensor_name (tensor, layer, name);
715- LOG_INF (" %5s\t %-20s\t %10.2f\t %7.4f\t %12.4f\t %8.4f\t %9.4f\t %8.2f%%\t %6d\t %12.4f\t %7.2f%%\t %9.2f%%\n " ,
716- layer.c_str (), name.c_str (), total, min, max, mean, stddev, active * 100 .0f , elements, entropy, 100 .0f * (entropy / std::log2 (elements)), 100 .0f * zd);
737+ process_tensor_name (tstat.tensor , layer, name);
738+ LOG_INF (" %5s\t %-20s\t %10.2f\t %8.4f\t %11.4f\t %6.2f\t %6.2f\t %8.2f%%\t %6d\t %10.4f\t %6.2f%%\t %10.2f%%\t %8.4f\n " ,
739+ layer.c_str (), name.c_str (), tstat.total_bias , tstat.min_bias , tstat.max_bias , tstat.mean_bias , tstat.stddev ,
740+ tstat.active * 100 .0f , tstat.elements , tstat.entropy , 100 .0f * (tstat.entropy / std::log2 (tstat.elements )),
741+ 100 .0f * tstat.zd , tstat.cossim );
717742 }
718743 LOG_INF (" \n " );
719744
0 commit comments