@@ -317,11 +317,14 @@ static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
317317
318318static void compute_layer_statistics (const std::vector<tensor_statistics> & tstats,
319319 std::map<int , float > & layer_cossim,
320- std::map<int , float > & layer_l2_norm ,
320+ std::map<int , float > & layer_l2_dist ,
321321 const std::unordered_map<std::string, Stats> & stats_map) {
322322 struct layer_aggregation {
323- std::vector<float > curr_avg;
324- std::vector<float > prev_avg;
323+ double sum_dot_prod = 0.0 ;
324+ double sum_norm1_sq = 0.0 ;
325+ double sum_norm2_sq = 0.0 ;
326+ double sum_l2_dist_sq = 0.0 ;
327+ int n_tensors = 0 ;
325328 };
326329
327330 static const std::regex pattern (R"( blk\.(\d+)\.)" );
@@ -335,9 +338,11 @@ static void compute_layer_statistics(const std::vector<tensor_statistics> & tsta
335338 if (!std::regex_search (ts.tensor , match, pattern)) { continue ; }
336339 const int blk = std::stoi (match[1 ]);
337340 if (blk <= 0 ) { continue ; }
341+
338342 std::string prev_lyr (ts.tensor );
339343 prev_lyr.replace (match.position (1 ), match.length (1 ), std::to_string (blk - 1 ));
340344 if (tidx.find (prev_lyr) == tidx.end ()) { continue ; }
345+
341346 auto it_curr = stats_map.find (ts.tensor );
342347 auto it_prev = stats_map.find (prev_lyr);
343348 if (it_curr == stats_map.end () || it_prev == stats_map.end ()) { continue ; }
@@ -346,38 +351,45 @@ static void compute_layer_statistics(const std::vector<tensor_statistics> & tsta
346351 const auto prev_avg = compute_tensor_averages (it_prev->second );
347352 if (curr_avg.empty () || prev_avg.empty () || curr_avg.size () != prev_avg.size ()) { continue ; }
348353
349- auto & entry = agr[blk];
350- entry.curr_avg .insert (entry.curr_avg .end (), curr_avg.begin (), curr_avg.end ());
351- entry.prev_avg .insert (entry.prev_avg .end (), prev_avg.begin (), prev_avg.end ());
352- }
353-
354- for (auto & kv : agr) {
355- const auto & curr = kv.second .curr_avg ;
356- const auto & prev = kv.second .prev_avg ;
357- if (curr.size () != prev.size () || curr.empty ()) { continue ; }
358-
354+ // Compute statistics for each tensor pair individually
359355 double dot_prod = 0.0 ;
360356 double norm1_sq = 0.0 ;
361357 double norm2_sq = 0.0 ;
362358 double l2_dist_sq = 0.0 ;
363359
364- for (size_t i = 0 ; i < curr .size (); ++i) {
365- const double c_val = curr [i];
366- const double p_val = prev [i];
360+ for (size_t i = 0 ; i < curr_avg .size (); ++i) {
361+ const double c_val = curr_avg [i];
362+ const double p_val = prev_avg [i];
367363 dot_prod += c_val * p_val;
368364 norm1_sq += c_val * c_val;
369365 norm2_sq += p_val * p_val;
370366 const double diff = c_val - p_val;
371367 l2_dist_sq += diff * diff;
372368 }
373369
370+ if (norm1_sq == 0.0 && norm2_sq == 0.0 ) { continue ; }
371+
372+ // Accumulate statistics for the layer
373+ auto & entry = agr[blk];
374+ entry.sum_dot_prod += dot_prod;
375+ entry.sum_norm1_sq += norm1_sq;
376+ entry.sum_norm2_sq += norm2_sq;
377+ entry.sum_l2_dist_sq += l2_dist_sq;
378+ entry.n_tensors ++;
379+ }
380+
381+ // Compute aggregated layer statistics
382+ for (auto & kv : agr) {
383+ const auto & agg = kv.second ;
384+ if (agg.n_tensors == 0 ) { continue ; }
385+
374386 // Compute aggregated Cosine Similarity
375387 float cossim = 0 .0f ;
376- if (norm1_sq > 0 .0f && norm2_sq > 0 .0f ) {
377- cossim = dot_prod / (std::sqrt (norm1_sq ) * std::sqrt (norm2_sq ));
388+ if (agg. sum_norm1_sq > 0.0 && agg. sum_norm2_sq > 0.0 ) {
389+ cossim = agg. sum_dot_prod / (std::sqrt (agg. sum_norm1_sq ) * std::sqrt (agg. sum_norm2_sq ));
378390 cossim = std::min (cossim, 1 .0f );
379391 cossim = std::max (cossim, -1 .0f );
380- } else if (norm1_sq == 0 .0f && norm2_sq == 0 .0f ) {
392+ } else if (agg. sum_norm1_sq == 0.0 && agg. sum_norm2_sq == 0.0 ) {
381393 cossim = 1 .0f ;
382394 }
383395 layer_cossim[kv.first ] = cossim;
0 commit comments