Skip to content

Commit ce046dc

Browse files
committed
Save statistics to imatrix
1 parent 7d8819f commit ce046dc

File tree

1 file changed

+41
-2
lines changed

1 file changed

+41
-2
lines changed

tools/imatrix/imatrix.cpp

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -740,11 +740,27 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
740740
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN);
741741
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN);
742742
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN);
743+
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * 4, GGML_MEM_ALIGN);
743744
}
744745

745746
// deterministic tensor name order
746747
std::sort(to_store.begin(), to_store.end());
747748

749+
// Compute per-tensor statistics (CosSim, L2 Dist, ECS) to store alongside sums
750+
std::vector<tensor_statistics> tstats;
751+
tstats.reserve(m_stats.size());
752+
bool legacy_mode = true;
753+
for (const auto & kv : m_stats) {
754+
const bool is_legacy = compute_vector_statistics(tstats, kv.first, kv.second);
755+
legacy_mode = legacy_mode && is_legacy;
756+
}
757+
if (!tstats.empty()) { compute_tensor_statistics(tstats); }
758+
759+
// index by tensor name
760+
std::unordered_map<std::string, const tensor_statistics *> tstat_index;
761+
tstat_index.reserve(tstats.size());
762+
for (const auto & ts : tstats) { tstat_index[ts.tensor] = &ts; }
763+
748764
struct ggml_init_params params = {
749765
/* .mem_size = */ data_size,
750766
/* .mem_buffer = */ NULL,
@@ -801,6 +817,29 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
801817
gguf_add_tensor(ctx_gguf, in_sum);
802818
}
803819
}
820+
821+
// Store per-tensor statistics as a small 1D tensor: [ECS, L2 Dist, CosSim, ZD Score]
822+
{
823+
float l2 = 0.0f;
824+
float cs = 0.0f;
825+
float zd = 0.0f;
826+
float ecs = 0.0f;
827+
auto it_ts = tstat_index.find(name);
828+
if (it_ts != tstat_index.end() && it_ts->second != nullptr) {
829+
l2 = it_ts->second->l2_dist;
830+
cs = it_ts->second->cossim;
831+
zd = it_ts->second->zd_score;
832+
ecs = 100.0f * (1.0f - std::exp(-0.01f * l2) * std::pow(std::fabs(cs), 10.0f));
833+
}
834+
835+
struct ggml_tensor * stats_t = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 4);
836+
ggml_format_name(stats_t, "%s.stats", name.c_str());
837+
((float *)stats_t->data)[0] = ecs;
838+
((float *)stats_t->data)[1] = l2;
839+
((float *)stats_t->data)[2] = cs;
840+
((float *)stats_t->data)[3] = zd;
841+
gguf_add_tensor(ctx_gguf, stats_t);
842+
}
804843
}
805844

806845
gguf_write_to_file(ctx_gguf, fname.c_str(), false);
@@ -1367,7 +1406,7 @@ static bool show_statistics(const common_params & params) {
13671406
}
13681407

13691408
const float h_norm = tstat.elements > 1 ? 100.0f * (tstat.entropy / std::log2((float) tstat.elements)) : 0.0f;
1370-
const float ecs = 100.0f * std::exp(-0.01f * tstat.l2_dist) * std::pow(std::fabs(tstat.cossim), 10.0f); // Euclidean-Cosine score
1409+
const float ecs = 100.0f * (1.0f - std::exp(-0.01f * tstat.l2_dist) * std::pow(std::fabs(tstat.cossim), 10.0f)); // Euclidean-Cosine score
13711410

13721411
LOG_INF("%5s\t%-20s\t%11.4f\t%10.4f\t%10.4f\t%8.4f\t%8.4f\t%7d\t%10.2f%%\t%10.4f\t%6.2f%%\t%10.4f\n",
13731412
layer.c_str(),
@@ -1432,7 +1471,7 @@ static bool show_statistics(const common_params & params) {
14321471
layer_l2n,
14331472
100.0f * stats.layer_zd / stats.n,
14341473
layer_cs,
1435-
100.0f * std::exp(-0.01f * layer_l2n) * std::pow(std::fabs(layer_cs), 10.0f));
1474+
100.0f * (1.0f - std::exp(-0.01f * layer_l2n) * std::pow(std::fabs(layer_cs), 10.0f))); // Euclidean-Cosine score
14361475
}
14371476
}
14381477
LOG_INF("\n");

0 commit comments

Comments
 (0)