@@ -38,10 +38,12 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
3838static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = " imatrix.chunk_size" ;
3939
4040struct Stats {
41+ std::vector<float > activations;
4142 std::vector<float > values;
4243 std::vector<int64_t > counts;
4344};
4445
46+ // ToDo: rename sqract variables to be more generic like 'values'
4547struct tensor_statistics {
4648 std::string tensor;
4749 Stats stats;
@@ -139,14 +141,28 @@ static void compute_statistics(std::vector<tensor_statistics> & tstats, const st
139141 const int row_size = e.values .size () / n_mat;
140142
141143 std::vector<float > activations;
142- activations.reserve (e.values .size ());
143144
144- for (int i = 0 ; i < n_mat; ++i) {
145- for (int j = 0 ; j < row_size; ++j) {
146- activations.push_back (e.values [i*row_size + j] / e.counts [i]);
145+ if (e.activations .empty ()) {
146+ activations.reserve (e.values .size ());
147+
148+ for (int i = 0 ; i < n_mat; ++i) {
149+ for (int j = 0 ; j < row_size; ++j) {
150+ activations.push_back (e.values [i*row_size + j] / e.counts [i]);
151+ }
152+ }
153+ } else {
154+ activations.reserve (e.activations .size ());
155+
156+ for (int i = 0 ; i < n_mat; ++i) {
157+ for (int j = 0 ; j < row_size; ++j) {
158+ activations.push_back (e.activations [i*row_size + j] / e.counts [i]);
159+ }
147160 }
148161 }
149162
163+
164+
165+ // ToDo: rename act_ variables to be more generic like 'values'
150166 const float act_total = std::accumulate (activations.begin (), activations.end (), 0 .0f );
151167 const float act_max = *std::max_element (activations.begin (), activations.end ());
152168 const float act_min = *std::min_element (activations.begin (), activations.end ());
@@ -282,6 +298,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
282298 e.counts .resize (n_as, e.counts [0 ]);
283299 }
284300 if (e.values .empty ()) {
301+ e.activations .resize (src1->ne [0 ]*n_as, 0 );
285302 e.values .resize (src1->ne [0 ]*n_as, 0 );
286303 e.counts .resize (n_as, 0 );
287304 }
@@ -313,6 +330,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
313330 e.counts [ex]++;
314331
315332 for (int64_t j = 0 ; j < src1->ne [0 ]; ++j) {
333+ e.activations [e_start + j] += x[j];
316334 e.values [e_start + j] += x[j] * x[j];
317335 if (!std::isfinite ((float )e.values [e_start + j])) {
318336 LOG_ERR (" %f detected in %s\n " , (float )e.values [e_start + j], wname.c_str ());
@@ -338,6 +356,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
338356 const int64_t n_mat = src1->ne [2 ] * src1->ne [3 ];
339357
340358 if (e.values .empty ()) {
359+ e.activations .resize (src1->ne [0 ] * n_mat, 0 );
341360 e.values .resize (src1->ne [0 ] * n_mat, 0 );
342361 e.counts .resize (n_mat, 0 );
343362 }
@@ -359,6 +378,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
359378 const float * x = (const float *) (data + row * src1->nb [1 ] + i2 * src1->nb [2 ] + i3 * src1->ne [3 ]);
360379 e.counts [mat_id]++;
361380 for (int64_t j = 0 ; j < src1->ne [0 ]; ++j) {
381+ e.activations [mat_start + j] += x[j];
362382 e.values [mat_start + j] += x[j] * x[j];
363383 if (!std::isfinite ((float )e.values [j])) {
364384 LOG_ERR (" %f detected in %s\n " , (float )e.values [j], wname.c_str ());
@@ -532,6 +552,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
532552 }
533553
534554 to_store.push_back (kv.first );
555+ data_size += GGML_PAD (ggml_tensor_overhead () + sizeof (float ) * kv.second .activations .size (), GGML_MEM_ALIGN);
535556 data_size += GGML_PAD (ggml_tensor_overhead () + sizeof (float ) * kv.second .values .size (), GGML_MEM_ALIGN);
536557 data_size += GGML_PAD (ggml_tensor_overhead () + sizeof (float ) * kv.second .counts .size (), GGML_MEM_ALIGN);
537558 }
@@ -584,6 +605,16 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
584605
585606 gguf_add_tensor (ctx_gguf, in_sum2);
586607 gguf_add_tensor (ctx_gguf, counts);
608+
609+ if (!stat.activations .empty ()) {
610+ const int32_t nact = (int32_t ) stat.activations .size ();
611+ struct ggml_tensor * in_sum = ggml_new_tensor_2d (ctx, GGML_TYPE_F32, nact / nmat, nmat);
612+ ggml_format_name (in_sum, " %s.in_sum" , name.c_str ()); // ToDo: consider a better name. 'in_act' maybe?
613+ for (int32_t j = 0 ; j < nval; ++j) {
614+ ((float *) in_sum->data )[j] = (float ) stat.activations [j];
615+ }
616+ gguf_add_tensor (ctx_gguf, in_sum);
617+ }
587618 }
588619 }
589620
@@ -722,14 +753,15 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
722753 }
723754 }
724755
756+ const std::string in_sum_suffix{ " .in_sum" };
725757 const std::string in_sum2_suffix{ " .in_sum2" };
726758 const std::string counts_suffix{ " .counts" };
727759
728760 // Could re-use m_stats instead, but this allows
729761 // checking for completeness of *each* loaded imatrix file
730762 // and also makes it easier to re-use a similar implementation in quantize.cpp
731763 // Using an ordered map to get a deterministic iteration order.
732- std::map<std::string, std::pair< struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
764+ std::map<std::string, std::tuple< struct ggml_tensor *, struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
733765
734766 for (struct ggml_tensor * cur = ggml_get_first_tensor (ctx); cur; cur = ggml_get_next_tensor (ctx, cur)) {
735767 std::string name = cur->name ;
@@ -738,19 +770,24 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
738770
739771 if (string_remove_suffix (name, in_sum2_suffix)) {
740772 // in_sum2
741- sums_counts_for[std::move (name)]. first = cur;
773+ std::get< 0 >( sums_counts_for[std::move (name)]) = cur;
742774 } else if (string_remove_suffix (name, counts_suffix)) {
743775 // counts
744- sums_counts_for[std::move (name)].second = cur;
745- } else {
776+ std::get<1 >(sums_counts_for[std::move (name)]) = cur;
777+ } else if (string_remove_suffix (name, in_sum_suffix)) {
778+ // in_sum
779+ std::get<2 >(sums_counts_for[std::move (name)]) = cur;
780+ }
781+ else {
746782 // ignore other tensors
747783 }
748784 }
749785
750786 for (const auto & sc : sums_counts_for) {
751787 const std::string & name = sc.first ;
752- const struct ggml_tensor * in_sum2 = sc.second .first ;
753- const struct ggml_tensor * counts = sc.second .second ;
788+ const struct ggml_tensor * in_sum2 = std::get<0 >(sc.second );
789+ const struct ggml_tensor * counts = std::get<1 >(sc.second );
790+ const struct ggml_tensor * in_sum = std::get<2 >(sc.second );
754791
755792 if (!in_sum2 || !counts) {
756793 LOG_ERR (" %s: mismatched sums and counts for %s\n " , __func__, name.c_str ());
@@ -764,6 +801,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
764801 int64_t nval = ggml_nelements (in_sum2);
765802 if (e.values .empty ()) {
766803 e.values .resize (nval, 0 .0f );
804+ e.activations .resize (nval, 0 .0f );
767805 } else if ((size_t ) nval != e.values .size ()) {
768806 LOG_ERR (" %s: mismatched sums size for %s: %zu != %zu\n " , __func__, name.c_str (), (size_t ) nval, e.values .size ());
769807 gguf_free (ctx_gguf);
@@ -791,6 +829,12 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
791829 for (int64_t j = 0 ; j < ncounts; j++) {
792830 e.counts [j] += std::lround (((const float *) counts->data )[j]);
793831 }
832+ // ToDo: fix blow up when GGUF does not have in_sum
833+ if (in_sum->data != nullptr ) {
834+ for (int64_t j = 0 ; j < nval; j++) {
835+ e.activations [j] += ((const float *) in_sum->data )[j];
836+ }
837+ }
794838 }
795839
796840 // TODO: extract into its own method; this is also used by the legacy format
0 commit comments