Skip to content

Commit 09bc7c2

Browse files
committed
Use activations to calculate the stats
1 parent 11dd5a4 commit 09bc7c2

File tree

1 file changed

+54
-10
lines changed

1 file changed

+54
-10
lines changed

tools/imatrix/imatrix.cpp

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,12 @@ static const char * const LLM_KV_IMATRIX_CHUNK_COUNT = "imatrix.chunk_count";
3838
static const char * const LLM_KV_IMATRIX_CHUNK_SIZE = "imatrix.chunk_size";
3939

4040
struct Stats {
41+
std::vector<float> activations;
4142
std::vector<float> values;
4243
std::vector<int64_t> counts;
4344
};
4445

46+
//ToDo: rename sqract variables to be more generic like 'values'
4547
struct tensor_statistics {
4648
std::string tensor;
4749
Stats stats;
@@ -139,14 +141,28 @@ static void compute_statistics(std::vector<tensor_statistics> & tstats, const st
139141
const int row_size = e.values.size() / n_mat;
140142

141143
std::vector<float> activations;
142-
activations.reserve(e.values.size());
143144

144-
for (int i = 0; i < n_mat; ++i) {
145-
for (int j = 0; j < row_size; ++j) {
146-
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
145+
if (e.activations.empty()) {
146+
activations.reserve(e.values.size());
147+
148+
for (int i = 0; i < n_mat; ++i) {
149+
for (int j = 0; j < row_size; ++j) {
150+
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
151+
}
152+
}
153+
} else {
154+
activations.reserve(e.activations.size());
155+
156+
for (int i = 0; i < n_mat; ++i) {
157+
for (int j = 0; j < row_size; ++j) {
158+
activations.push_back(e.activations[i*row_size + j] / e.counts[i]);
159+
}
147160
}
148161
}
149162

163+
164+
165+
//ToDo: rename act_ variables to be more generic like 'values'
150166
const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f);
151167
const float act_max = *std::max_element(activations.begin(), activations.end());
152168
const float act_min = *std::min_element(activations.begin(), activations.end());
@@ -282,6 +298,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
282298
e.counts.resize(n_as, e.counts[0]);
283299
}
284300
if (e.values.empty()) {
301+
e.activations.resize(src1->ne[0]*n_as, 0);
285302
e.values.resize(src1->ne[0]*n_as, 0);
286303
e.counts.resize(n_as, 0);
287304
}
@@ -313,6 +330,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
313330
e.counts[ex]++;
314331

315332
for (int64_t j = 0; j < src1->ne[0]; ++j) {
333+
e.activations[e_start + j] += x[j];
316334
e.values[e_start + j] += x[j] * x[j];
317335
if (!std::isfinite((float)e.values[e_start + j])) {
318336
LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str());
@@ -338,6 +356,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
338356
const int64_t n_mat = src1->ne[2] * src1->ne[3];
339357

340358
if (e.values.empty()) {
359+
e.activations.resize(src1->ne[0] * n_mat, 0);
341360
e.values.resize(src1->ne[0] * n_mat, 0);
342361
e.counts.resize(n_mat, 0);
343362
}
@@ -359,6 +378,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
359378
const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->ne[3]);
360379
e.counts[mat_id]++;
361380
for (int64_t j = 0; j < src1->ne[0]; ++j) {
381+
e.activations[mat_start + j] += x[j];
362382
e.values[mat_start + j] += x[j] * x[j];
363383
if (!std::isfinite((float)e.values[j])) {
364384
LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str());
@@ -532,6 +552,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
532552
}
533553

534554
to_store.push_back(kv.first);
555+
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN);
535556
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN);
536557
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN);
537558
}
@@ -584,6 +605,16 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
584605

585606
gguf_add_tensor(ctx_gguf, in_sum2);
586607
gguf_add_tensor(ctx_gguf, counts);
608+
609+
if (!stat.activations.empty()) {
610+
const int32_t nact = (int32_t) stat.activations.size();
611+
struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat);
612+
ggml_format_name(in_sum, "%s.in_sum", name.c_str()); // ToDo: consider a better name. 'in_act' maybe?
613+
for (int32_t j = 0; j < nval; ++j) {
614+
((float *) in_sum->data)[j] = (float) stat.activations[j];
615+
}
616+
gguf_add_tensor(ctx_gguf, in_sum);
617+
}
587618
}
588619
}
589620

@@ -722,14 +753,15 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
722753
}
723754
}
724755

756+
const std::string in_sum_suffix{ ".in_sum" };
725757
const std::string in_sum2_suffix{ ".in_sum2" };
726758
const std::string counts_suffix{ ".counts" };
727759

728760
// Could re-use m_stats instead, but this allows
729761
// checking for completeness of *each* loaded imatrix file
730762
// and also makes it easier to re-use a similar implementation in quantize.cpp
731763
// Using an ordered map to get a deterministic iteration order.
732-
std::map<std::string, std::pair<struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
764+
std::map<std::string, std::tuple<struct ggml_tensor *, struct ggml_tensor *, struct ggml_tensor *>> sums_counts_for;
733765

734766
for (struct ggml_tensor * cur = ggml_get_first_tensor(ctx); cur; cur = ggml_get_next_tensor(ctx, cur)) {
735767
std::string name = cur->name;
@@ -738,19 +770,24 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
738770

739771
if (string_remove_suffix(name, in_sum2_suffix)) {
740772
// in_sum2
741-
sums_counts_for[std::move(name)].first = cur;
773+
std::get<0>(sums_counts_for[std::move(name)]) = cur;
742774
} else if (string_remove_suffix(name, counts_suffix)) {
743775
// counts
744-
sums_counts_for[std::move(name)].second = cur;
745-
} else {
776+
std::get<1>(sums_counts_for[std::move(name)]) = cur;
777+
} else if (string_remove_suffix(name, in_sum_suffix)) {
778+
// in_sum
779+
std::get<2>(sums_counts_for[std::move(name)]) = cur;
780+
}
781+
else {
746782
// ignore other tensors
747783
}
748784
}
749785

750786
for (const auto & sc : sums_counts_for) {
751787
const std::string & name = sc.first;
752-
const struct ggml_tensor * in_sum2 = sc.second.first;
753-
const struct ggml_tensor * counts = sc.second.second;
788+
const struct ggml_tensor * in_sum2 = std::get<0>(sc.second);
789+
const struct ggml_tensor * counts = std::get<1>(sc.second);
790+
const struct ggml_tensor * in_sum = std::get<2>(sc.second);
754791

755792
if (!in_sum2 || !counts) {
756793
LOG_ERR("%s: mismatched sums and counts for %s\n", __func__, name.c_str());
@@ -764,6 +801,7 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
764801
int64_t nval = ggml_nelements(in_sum2);
765802
if (e.values.empty()) {
766803
e.values.resize(nval, 0.0f);
804+
e.activations.resize(nval, 0.0f);
767805
} else if ((size_t) nval != e.values.size()) {
768806
LOG_ERR("%s: mismatched sums size for %s: %zu != %zu\n", __func__, name.c_str(), (size_t) nval, e.values.size());
769807
gguf_free(ctx_gguf);
@@ -791,6 +829,12 @@ bool IMatrixCollector::load_imatrix(const char * file_name) {
791829
for (int64_t j = 0; j < ncounts; j++) {
792830
e.counts[j] += std::lround(((const float *) counts->data)[j]);
793831
}
832+
// ToDo: fix blow up when GGUF does not have in_sum
833+
if (in_sum->data != nullptr) {
834+
for (int64_t j = 0; j < nval; j++) {
835+
e.activations[j] += ((const float *) in_sum->data)[j];
836+
}
837+
}
794838
}
795839

796840
// TODO: extract into its own method; this is also used by the legacy format

0 commit comments

Comments
 (0)