@@ -166,44 +166,33 @@ static std::vector<float> compute_tensor_averages(const Stats & tstats) {
166166static bool compute_vector_statistics (std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
167167 const size_t n_mat = e.counts .size ();
168168 const size_t len = e.activations .empty () ? e.values .size () : e.activations .size ();
169-
170169 if (n_mat == 0 ) {
171170 LOG_ERR (" %s: there are no activations for tensor %s. The imatrix may be suboptimal\n " , __func__, name.c_str ());
172171 return false ;
173172 }
174-
175173 if (len == 0 || (len % n_mat) != 0 ) {
176174 LOG_ERR (" %s: activation size mismatch for tensor %s (len=%zu, counts=%zu)\n " , __func__, name.c_str (), len, n_mat);
177175 return false ;
178176 }
179177
180- const int row_size = (int )(len / n_mat);
181-
178+ const size_t row_size = len / n_mat;
182179 std::vector<float > activations;
183180 activations.reserve (len);
184181
185- if (e.activations .empty ()) {
186- for (size_t i = 0 ; i < n_mat; ++i) {
187- const auto c = (float )e.counts [i];
188- const size_t off = i * row_size;
189- for (int j = 0 ; j < row_size; ++j) {
190- if (c <= 0 .0f ) {
191- activations.push_back (0 .0f );
192- } else {
193- activations.push_back (e.values [off + j] / c);
194- }
195- }
182+ for (size_t i = 0 ; i < n_mat; ++i) {
183+ const auto c = (float )e.counts [i];
184+ const size_t off = i * row_size;
185+ if (c <= 0 .0f ) {
186+ activations.insert (activations.end (), row_size, 0 .0f );
187+ continue ;
196188 }
197- } else {
198- for (size_t i = 0 ; i < n_mat; ++i) {
199- const auto c = (float )e.counts [i];
200- const size_t off = i * row_size;
201- for (int j = 0 ; j < row_size; ++j) {
202- if (c <= 0 .0f ) {
203- activations.push_back (0 .0f );
204- } else {
205- activations.push_back (e.activations [off + j] / c);
206- }
189+ if (e.activations .empty ()) {
190+ for (size_t j = 0 ; j < row_size; ++j) {
191+ activations.push_back (e.values [off + j] / c); // mean-of-squares
192+ }
193+ } else {
194+ for (size_t j = 0 ; j < row_size; ++j) {
195+ activations.push_back (e.activations [off + j] / c); // mean
207196 }
208197 }
209198 }
@@ -213,59 +202,63 @@ static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, c
213202 return false ;
214203 }
215204
216- const float sum = std::accumulate (activations.begin (), activations.end (), 0 .0f );
217- const float max = * std::max_element (activations.begin (), activations.end ());
218- const float min = * std::min_element (activations.begin (), activations.end ());
219- const float mean = sum / activations.size ();
220- const float sqr_sum = std::inner_product (activations.begin (), activations.end (), activations.begin (), 0 .0f );
221- const float variance = sqr_sum / activations.size () - mean * mean;
222- const float std_deviation = std::sqrt (std::max (0 .0f , variance));
205+ double sum = 0.0 ;
206+ float vmax = activations[0 ];
207+ float vmin = activations[0 ];
208+ for (float v : activations) {
209+ sum += v;
210+ vmax = std::max (vmax, v);
211+ vmin = std::min (vmin, v);
212+ }
213+
214+ const auto mean = (float )(sum / (double )activations.size ());
215+ double sqr_sum = 0.0 ;
216+ for (const float v : activations) { sqr_sum += (double )v * (double )v; }
217+ double variance = sqr_sum / (double )activations.size () - (double )mean * (double )mean;
218+ if (variance < 0.0 ) { variance = 0.0 ; }
219+ const float std_deviation = std::sqrt ((float )variance);
223220
224221 float entropy = 0 .0f ;
225222 if (e.activations .empty ()) {
226- // classic entropy on normalized activations distribution
227- if (sum > 0 .0f ) {
228- for (const auto act : activations) {
229- const float p = act / sum;
230- if (p > 0 .0f ) { entropy -= p * std::log2 (p); }
223+ double energy_sum = 0.0 ;
224+ for (float v : activations) { energy_sum += (double )std::max (0 .0f , v); }
225+ if (energy_sum > 0.0 ) {
226+ for (const float v : activations) {
227+ const double p = std::max (0.0 , (double )v) / energy_sum;
228+ if (p > 0.0 ) { entropy -= (float )(p * std::log2 (p)); }
231229 }
232230 }
233231 } else {
234- // entropy on normalized squared weights
235- float div = 0 .0f ;
236- std::vector<float > weights (activations.size ());
237- for (size_t i = 0 ; i < activations.size (); ++i) {
238- const float w = activations[i] * activations[i];
239- weights[i] = w;
240- div += w;
241- }
242- if (div > 0 .0f ) {
243- for (const float w : weights) {
244- const float p = w / div;
245- if (p > 0 .0f ) { entropy -= p * std::log2 (p); }
232+ double energy_sum = 0.0 ;
233+ for (const float v : activations) { energy_sum += (double )v * (double )v; }
234+ if (energy_sum > 0.0 ) {
235+ for (const float v : activations) {
236+ const double p = (double )v * (double )v / energy_sum;
237+ if (p > 0.0 ) { entropy -= (float )(p * std::log2 (p)); }
246238 }
247239 }
248240 }
249241
250- float zd_score = 0 .0f ;
242+ // ZD score: fraction with |z| > 1
243+ double zd_count = 0.0 ;
251244 if (std_deviation > 0 .0f ) {
252- for (const auto act : activations) {
253- const float z = (act - mean) / std_deviation;
254- if (std::fabs (z) > 1 .0f ) { zd_score++ ; }
245+ for (const float v : activations) {
246+ const float z = (v - mean) / std_deviation;
247+ if (std::fabs (z) > 1 .0f ) { zd_count += 1.0 ; }
255248 }
256249 }
257250
258251 auto & ts = tstats.emplace_back ();
259252 ts.tensor = name;
260253 ts.stats = e;
261- ts.sum_values = sum;
254+ ts.sum_values = ( float ) sum;
262255 ts.mean_values = mean;
263- ts.max_values = max ;
264- ts.min_values = min ;
265- ts.elements = static_cast < int >( activations.size () );
256+ ts.max_values = vmax ;
257+ ts.min_values = vmin ;
258+ ts.elements = ( int ) activations.size ();
266259 ts.std_deviation = std_deviation;
267260 ts.entropy = entropy;
268- ts.zd_score = zd_score / ts.elements ;
261+ ts.zd_score = ts. elements > 0 ? ( float )(zd_count / ( double ) ts.elements ) : 0 . 0f ;
269262
270263 return e.activations .empty ();
271264}
0 commit comments