1+ #include " ../../src/llama-impl.h"
12#include " arg.h"
23#include " common.h"
3- #include " log.h"
4- #include " llama.h"
54#include " gguf.h"
5+ #include " llama.h"
6+ #include " log.h"
67
78#include < algorithm>
89#include < chrono>
910#include < cmath>
1011#include < cstdio>
1112#include < cstring>
1213#include < ctime>
13- #include < thread>
14- #include < mutex>
15- #include < vector>
1614#include < fstream>
17- #include < unordered_map>
1815#include < map>
19- #include < regex >
16+ #include < mutex >
2017#include < numeric>
18+ #include < regex>
19+ #include < thread>
20+ #include < unordered_map>
21+ #include < vector>
2122
2223#if defined(_MSC_VER)
2324#pragma warning(disable: 4244 4267) // possible loss of data
@@ -43,7 +44,6 @@ struct Stats {
4344 std::vector<int64_t > counts;
4445};
4546
46- // ToDo: rename sqract variables to be more generic like 'values'
4747struct tensor_statistics {
4848 std::string tensor;
4949 Stats stats;
@@ -57,6 +57,7 @@ struct tensor_statistics {
5757 float entropy = 0 .0f ;
5858 float zd_score = 0 .0f ;
5959 float cossim = 0 .0f ;
60+ float l2_norm = 0 .0f ;
6061};
6162
6263class IMatrixCollector {
@@ -253,6 +254,7 @@ static void compute_layer_statistics(std::vector<tensor_statistics> & tstats) {
253254 }
254255 return v;
255256 };
257+
256258 // compute the cosine similarity between the same tensors in consecutive layers
257259 for (auto & ts : tstats) {
258260 ts.cossim = 0 ;
@@ -278,6 +280,30 @@ static void compute_layer_statistics(std::vector<tensor_statistics> & tstats) {
278280 }
279281 }
280282 }
283+
284+ // compute the L2 norm between the same tensors in consecutive layers
285+ for (auto & ts : tstats) {
286+ ts.l2_norm = 0 .0f ;
287+ if (ts.stats .in_sum .empty ()) continue ;
288+
289+ if (std::smatch match; std::regex_search (ts.tensor , match, pattern)) {
290+ const int blk = std::stoi (match[1 ]);
291+ if (blk <= 0 ) continue ;
292+ std::string tname (ts.tensor );
293+ tname.replace (match.position (1 ), match.length (1 ), std::to_string (blk - 1 ));
294+ auto prev = std::find_if (tstats.begin (), tstats.end (),
295+ [tname](const tensor_statistics & t) { return t.tensor == tname; });
296+ if (prev == tstats.end ()) continue ;
297+ const auto cur_avg = build_avg (ts.stats );
298+ const auto prev_avg = build_avg (prev->stats );
299+ if (cur_avg.empty () || prev_avg.empty () || cur_avg.size () != prev_avg.size ()) continue ;
300+
301+ float dist = 0.0 ;
302+ for (size_t i = 0 ; i < cur_avg.size (); ++i) {
303+ const float act = cur_avg[i] - prev_avg[i];
304+ dist += act * act;
305+ }
306+ ts.l2_norm = std::sqrt (dist);
281307 }
282308 }
283309}
0 commit comments