Skip to content

Commit dcac206

Browse files
committed
Add --activation-statistics logic to avoid doubling the imatrix size by default
1 parent 6fe51e1 commit dcac206

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

tools/imatrix/imatrix.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class IMatrixCollector {
6464
public:
6565
IMatrixCollector() = default;
6666
void set_params(common_params params) { m_params = std::move(params); }
67+
bool activation_statistics() const { return m_params.activation_statistics; }
6768
bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data);
6869
void save_imatrix_legacy(int32_t ncall = -1) const;
6970
void save_imatrix(int32_t n_chunk = -1) const;
@@ -429,9 +430,8 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
429430
// broadcast, when loading an old imatrix
430431
e.counts.resize(n_as, e.counts[0]);
431432
}
432-
// ToDo: find an efficient way to implement --activation-statistics to avoid doubling the imatrix size by default
433433
if (e.values.empty()) {
434-
e.activations.resize(src1->ne[0]*n_as, 0);
434+
if (activation_statistics()) e.activations.resize(src1->ne[0]*n_as, 0);
435435
e.values.resize(src1->ne[0]*n_as, 0);
436436
e.counts.resize(n_as, 0);
437437
}
@@ -463,7 +463,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
463463
e.counts[ex]++;
464464

465465
for (int64_t j = 0; j < src1->ne[0]; ++j) {
466-
e.activations[e_start + j] += x[j];
466+
if (activation_statistics()) e.activations[e_start + j] += x[j];
467467
e.values[e_start + j] += x[j] * x[j];
468468
if (!std::isfinite((float)e.values[e_start + j])) {
469469
LOG_ERR("%f detected in %s\n", (float)e.values[e_start + j], wname.c_str());
@@ -503,7 +503,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
503503
}
504504
}
505505
if (e.values.empty()) {
506-
e.activations.resize(src1->ne[0] * n_mat, 0);
506+
if (activation_statistics()) e.activations.resize(src1->ne[0] * n_mat, 0);
507507
e.values.resize(src1->ne[0] * n_mat, 0);
508508
e.counts.resize(1, 0);
509509
}
@@ -522,7 +522,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
522522
for (int64_t row = 0; row < src1->ne[1]; ++row) {
523523
const float * x = (const float *) (data + row * src1->nb[1] + i2 * src1->nb[2] + i3 * src1->nb[3]);
524524
for (int64_t j = 0; j < src1->ne[0]; ++j) {
525-
e.activations[mat_start + j] += x[j];
525+
if (activation_statistics()) e.activations[mat_start + j] += x[j];
526526
e.values[mat_start + j] += x[j] * x[j];
527527
if (!std::isfinite((float)e.values[j])) {
528528
LOG_ERR("%f detected in %s\n", (float)e.values[j], wname.c_str());
@@ -704,7 +704,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
704704
}
705705

706706
to_store.push_back(kv.first);
707-
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN);
707+
if (activation_statistics()) data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.activations.size(), GGML_MEM_ALIGN);
708708
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.values.size(), GGML_MEM_ALIGN);
709709
data_size += GGML_PAD(ggml_tensor_overhead() + sizeof(float) * kv.second.counts.size(), GGML_MEM_ALIGN);
710710
}
@@ -758,7 +758,7 @@ void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
758758
gguf_add_tensor(ctx_gguf, in_sum2);
759759
gguf_add_tensor(ctx_gguf, counts);
760760

761-
if (!stat.activations.empty()) {
761+
if (!stat.activations.empty() && activation_statistics()) {
762762
const int32_t nact = (int32_t) stat.activations.size();
763763
struct ggml_tensor * in_sum = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nact / nmat, nmat);
764764
ggml_format_name(in_sum, "%s.in_sum", name.c_str());

0 commit comments

Comments
 (0)