Skip to content

Commit 755c1ef

Browse files
committed
Add Cosine Similarity
1 parent 91d48da commit 755c1ef

File tree

1 file changed

+78
-53
lines changed

1 file changed

+78
-53
lines changed

examples/imatrix/imatrix.cpp

Lines changed: 78 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
#include "arg.h"
2+
#include "common.h"
3+
#include "llama-impl.h"
4+
#include "llama.h"
5+
#include "log.h"
6+
17
#include <algorithm>
28
#include <chrono>
39
#include <cmath>
@@ -10,11 +16,7 @@
1016
#include <thread>
1117
#include <unordered_map>
1218
#include <vector>
13-
14-
#include "arg.h"
15-
#include "common.h"
16-
#include "llama.h"
17-
#include "log.h"
19+
#include <regex>
1820

1921
#if defined(_MSC_VER)
2022
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -36,16 +38,17 @@ struct Stats {
3638

3739
struct tensor_statistics {
3840
std::string tensor;
39-
float total = 0;
40-
float mean = 0;
41-
float max = 0;
42-
float min = 0;
41+
Stats stats;
42+
float total_bias = 0;
43+
float mean_bias = 0;
44+
float max_bias = 0;
45+
float min_bias = 0;
46+
int elements = 0;
4347
float stddev = 0;
44-
float cv = 0;
45-
float zd = 0;
4648
float active = 0;
4749
float entropy = 0;
48-
int elements = 0;
50+
float zd = 0;
51+
float cossim = 0;
4952
};
5053

5154
class IMatrixCollector {
@@ -332,7 +335,7 @@ void IMatrixCollector::save_imatrix(int ncall) const {
332335
LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
333336
}
334337

335-
bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_statistics> * ts) {
338+
bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_statistics> * tstats) {
336339
std::ifstream in(fname, std::ios::binary);
337340
if (!in) {
338341
LOG_ERR("%s: failed to open %s\n",__func__, fname);
@@ -381,58 +384,78 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_stati
381384
// Recreate the state as expected by save_imatrix(), and correct for weighted sum.
382385
std::vector<float> activations;
383386
activations.reserve(nval);
384-
385387
for (int i = 0; i < nval; i++) {
386388
e.values[i] += tmp[i];
387389
e.counts[i] += ncall;
388-
activations.push_back(e.values[i] / static_cast<float>(e.counts[i]));
390+
activations.push_back(e.values[i] / e.counts[i]);
389391
}
390392
e.ncall += ncall;
391393

392-
if (ts) {
393-
float total_bias = std::accumulate(activations.begin(), activations.end(), 0.0f);
394-
float max_bias = * std::max_element(activations.begin(), activations.end());
395-
float min_bias = * std::min_element(activations.begin(), activations.end());
396-
float mean_bias = total_bias / activations.size();
397-
float sq_total_bias = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
398-
float dev = std::sqrt((sq_total_bias / activations.size()) - (mean_bias * mean_bias));
399-
float rmsd = mean_bias > 0.0f ? dev / mean_bias : 0.0f;
400-
401-
float threshold = 1e-6f;
402-
int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) < threshold; });
403-
float active_ratio = 1 - (static_cast<float>(inactive_count) / activations.size());
404-
405-
float ent = 0.0f;
406-
if (total_bias > 0) {
394+
if (tstats) {
395+
float total = std::accumulate(activations.begin(), activations.end(), 0.0f);
396+
float max = * std::max_element(activations.begin(), activations.end());
397+
float min = * std::min_element(activations.begin(), activations.end());
398+
float mean = total / activations.size();
399+
float sq_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
400+
float dev = std::sqrt((sq_total / activations.size()) - (mean * mean));
401+
402+
float threshold = min + min * 0.5f;
403+
int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) <= threshold; });
404+
float active_ratio = 1 - static_cast<float>(inactive_count) / activations.size();
405+
406+
float ent = 0;
407+
if (total > 0) {
407408
for (auto act : activations) {
408-
if (float p = act / total_bias; p > 0) {
409+
if (float p = act / total; p > 0) {
409410
ent -= p* std::log2(p);
410411
}
411412
}
412413
}
413414

414415
int z_score = 0;
415416
for (auto act : activations) {
416-
if (float p = (act - mean_bias) / dev; p > 1) {
417+
if (float p = (act - mean) / dev; p > 1) {
417418
z_score++;
418419
}
419420
}
420421

421-
ts->emplace_back();
422-
auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] = (*ts)[i];
423-
tensor = name_as_vec.data();
424-
total = total_bias;
425-
mean = mean_bias;
426-
max = max_bias;
427-
min = min_bias;
428-
stddev = dev;
429-
cv = rmsd;
430-
active = active_ratio;
431-
entropy = ent;
432-
elements = static_cast<int>(activations.size());
433-
zd = static_cast<float>(z_score) / static_cast<float>(elements);
422+
tstats->emplace_back();
423+
auto & ts = (*tstats)[i];
424+
ts.tensor = name_as_vec.data();
425+
ts.stats = e;
426+
ts.total_bias = total;
427+
ts.mean_bias = mean;
428+
ts.max_bias = max;
429+
ts.min_bias = min;
430+
ts.elements = static_cast<int>(activations.size());
431+
ts.stddev = dev;
432+
ts.active = active_ratio;
433+
ts.entropy = ent;
434+
ts.zd = static_cast<float>(z_score) / ts.elements;
435+
}
436+
}
437+
438+
if (tstats) {
439+
static const std::regex pattern(R"(blk\.(\d+)\.)");
440+
for (auto & ts : *tstats) {
441+
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
442+
const int blk = std::stoi(match[1]);
443+
std::string tname(ts.tensor);
444+
tname.replace(match.position(1), match.length(1), std::to_string(blk-1));
445+
auto prev = std::find_if(tstats->begin(), tstats->end(), [tname](const tensor_statistics & t) { return t.tensor == tname; });
446+
if (prev != tstats->end()) {
447+
const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), prev->stats.values.begin(), 0.0f);
448+
const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), ts.stats.values.begin(), 0.0f));
449+
const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(), prev->stats.values.begin(), 0.0f));
450+
const float cs = dp / (curr_mag * prev_mag);
451+
ts.cossim = cs;
452+
}
453+
} else {
454+
ts.cossim = 0;
455+
}
434456
}
435457
}
458+
436459
return true;
437460
}
438461

@@ -700,20 +723,22 @@ int main(int argc, char ** argv) {
700723
std::string layer, name_a, name_b;;
701724
process_tensor_name(a.tensor, layer, name_a);
702725
process_tensor_name(b.tensor, layer, name_b);
703-
return name_a < name_b || (name_a == name_b && a.total > b.total);
726+
return name_a < name_b || (name_a == name_b && a.total_bias > b.total_bias);
704727
}
705728
};
706729
std::sort(ts.begin(), ts.end(), tensor_comparer());
707730

708731
LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
709-
LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n",
710-
"Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score");
711-
LOG_INF("==========================================================================================================================================================================\n");
712-
for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) {
732+
LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
733+
" Layer", " Tensor", " Σ(Bias)", " Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD", " CosSim");
734+
LOG_INF("=========================================================================================================================================================================\n");
735+
for (const auto & tstat : ts) {
713736
std::string layer, name;
714-
process_tensor_name(tensor, layer, name);
715-
LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%9.2f%%\n",
716-
layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 100.0f * zd);
737+
process_tensor_name(tstat.tensor, layer, name);
738+
LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n",
739+
layer.c_str(), name.c_str(), tstat.total_bias, tstat.min_bias, tstat.max_bias, tstat.mean_bias, tstat.stddev,
740+
tstat.active * 100.0f, tstat.elements, tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)),
741+
100.0f * tstat.zd, tstat.cossim);
717742
}
718743
LOG_INF("\n");
719744

0 commit comments

Comments
 (0)