Skip to content

Commit 006e7ef

Browse files
committed
Improve compute_vector_statistics() processing of mismatched tensor sizes
1 parent 2a6f5d7 commit 006e7ef

File tree

1 file changed

+60
-59
lines changed

1 file changed

+60
-59
lines changed

tools/imatrix/imatrix.cpp

Lines changed: 60 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ static std::vector<float> compute_tensor_averages(const Stats & tstats) {
166166
static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
167167
const size_t n_mat = e.counts.size();
168168
const size_t len = e.activations.empty() ? e.values.size() : e.activations.size();
169+
const bool legacy = e.activations.empty();
169170
if (n_mat == 0) {
170171
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str());
171172
return false;
@@ -174,91 +175,91 @@ static bool compute_vector_statistics(std::vector<tensor_statistics> & tstats, c
174175
LOG_ERR("%s: activation size mismatch for tensor %s (len=%zu, counts=%zu)\n", __func__, name.c_str(), len, n_mat);
175176
return false;
176177
}
178+
if (!legacy && e.values.size() != len) {
179+
LOG_ERR("%s: activations/values size mismatch for tensor %s (act=%zu, val=%zu)\n", __func__, name.c_str(), len, e.values.size());
180+
return false;
181+
}
177182

178183
const size_t row_size = len / n_mat;
179-
std::vector<float> activations;
180-
activations.reserve(len);
181-
184+
double mean = 0.0;
185+
double M2 = 0.0;
186+
double sum = 0.0;
187+
float vmin = std::numeric_limits<float>::infinity();
188+
float vmax = -std::numeric_limits<float>::infinity();
189+
double energy_sum = 0.0;
190+
size_t valid_n = 0;
182191
for (size_t i = 0; i < n_mat; ++i) {
183192
const auto c = (float)e.counts[i];
193+
if (c <= 0.0f) { continue; } // skip experts with zero count
184194
const size_t off = i * row_size;
185-
if (c <= 0.0f) {
186-
activations.insert(activations.end(), row_size, 0.0f);
187-
continue;
188-
}
189-
if (e.activations.empty()) {
190-
for (size_t j = 0; j < row_size; ++j) {
191-
activations.push_back(e.values[off + j] / c); // mean-of-squares
192-
}
193-
} else {
194-
for (size_t j = 0; j < row_size; ++j) {
195-
activations.push_back(e.activations[off + j] / c); // mean
196-
}
195+
196+
for (size_t j = 0; j < row_size; ++j) {
197+
const double v_avg = legacy ? 0.0 : (double)e.activations[off + j] / (double)c; // E[x]
198+
const double v_energy = (double)e.values[off + j] / (double)c; // E[x^2]
199+
const double v = legacy ? v_energy : v_avg;
200+
201+
++valid_n;
202+
sum += v;
203+
vmin = std::min(vmin, (float)v);
204+
vmax = std::max(vmax, (float)v);
205+
206+
const double delta = v - mean;
207+
mean += delta / (double)valid_n;
208+
M2 += delta * (v - mean);
209+
energy_sum += std::max(0.0, v_energy);
197210
}
198211
}
199212

200-
if (activations.empty()) {
201-
LOG_ERR("%s: computed empty activation vector for tensor %s\n", __func__, name.c_str());
213+
if (valid_n == 0) {
214+
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str());
202215
return false;
203216
}
204217

205-
double sum = 0.0;
206-
float vmax = activations[0];
207-
float vmin = activations[0];
208-
for (float v : activations) {
209-
sum += v;
210-
vmax = std::max(vmax, v);
211-
vmin = std::min(vmin, v);
212-
}
213-
214-
const auto mean = (float)(sum / (double)activations.size());
215-
double sqr_sum = 0.0;
216-
for (const float v : activations) { sqr_sum += (double)v * (double)v; }
217-
double variance = sqr_sum / (double)activations.size() - (double)mean * (double)mean;
218-
variance = std::max(variance, 0.0);
219-
const float std_deviation = std::sqrt((float)variance);
220-
218+
float std_deviation = 0.0f;
221219
float entropy = 0.0f;
222-
if (e.activations.empty()) {
223-
double energy_sum = 0.0;
224-
for (float v : activations) { energy_sum += (double)std::max(0.0f, v); }
225-
if (energy_sum > 0.0) {
226-
for (const float v : activations) {
227-
const double p = std::max(0.0, (double)v) / energy_sum;
228-
if (p > 0.0) { entropy -= (float)(p * std::log2(p)); }
229-
}
230-
}
231-
} else {
232-
double energy_sum = 0.0;
233-
for (const float v : activations) { energy_sum += (double)v * (double)v; }
234-
if (energy_sum > 0.0) {
235-
for (const float v : activations) {
236-
const double p = (double)v * (double)v / energy_sum;
220+
double zd_count = 0.0;
221+
double variance = valid_n > 1 ? M2 / ((double)valid_n - 1) : 0.0;
222+
variance = std::max(variance, 0.0);
223+
std_deviation = std::sqrt((float)variance);
224+
if (energy_sum > 0.0) {
225+
for (size_t i = 0; i < n_mat; ++i) {
226+
const auto c = (float)e.counts[i];
227+
if (c <= 0.0f) { continue; }
228+
const size_t off = i * row_size;
229+
for (size_t j = 0; j < row_size; ++j) {
230+
const double v_energy = (double)e.values[off + j] / (double)c; // E[x^2]
231+
const double w = std::max(0.0, v_energy);
232+
const double p = w / energy_sum;
237233
if (p > 0.0) { entropy -= (float)(p * std::log2(p)); }
238234
}
239235
}
240236
}
241-
242-
// ZD score: fraction with |z| > 1
243-
double zd_count = 0.0;
244237
if (std_deviation > 0.0f) {
245-
for (const float v : activations) {
246-
const float z = (v - mean) / std_deviation;
247-
if (std::fabs(z) > 1.0f) { zd_count += 1.0; }
238+
for (size_t i = 0; i < n_mat; ++i) {
239+
const float c = (float)e.counts[i];
240+
if (c <= 0.0f) { continue; }
241+
const size_t off = i * row_size;
242+
for (size_t j = 0; j < row_size; ++j) {
243+
const double v_avg = legacy ? 0.0 : (double)e.activations[off + j] / (double)c; // E[x]
244+
const double v_energy = (double)e.values[off + j] / (double)c; // E[x^2]
245+
const float v = (float)(legacy ? v_energy : v_avg);
246+
const float z = (v - (float)mean) / std_deviation;
247+
if (std::fabs(z) > 1.0f) { zd_count += 1.0; }
248+
}
248249
}
249250
}
250251

251252
auto & ts = tstats.emplace_back();
252253
ts.tensor = name;
253254
ts.stats = e;
254255
ts.sum_values = (float)sum;
255-
ts.mean_values = mean;
256+
ts.mean_values = (float)mean;
256257
ts.max_values = vmax;
257258
ts.min_values = vmin;
258-
ts.elements = (int)activations.size();
259+
ts.elements = valid_n;
259260
ts.std_deviation = std_deviation;
260261
ts.entropy = entropy;
261-
ts.zd_score = ts.elements > 0 ? (float)(zd_count / (double)ts.elements) : 0.0f;
262+
ts.zd_score = (float)(zd_count / (double)valid_n);
262263

263264
return e.activations.empty();
264265
}
@@ -267,7 +268,7 @@ static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
267268
static const std::regex pattern(R"(blk\.(\d+)\.)");
268269
for (auto & ts : tstats) {
269270
ts.cossim = 0.0f;
270-
ts.l2_norm = 0.0f;
271+
ts.l2_dist = 0.0f;
271272

272273
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
273274
const int blk = std::stoi(match[1]);
@@ -309,7 +310,7 @@ static void compute_tensor_statistics(std::vector<tensor_statistics> & tstats) {
309310
ts.cossim = cs;
310311

311312
// Compute L2 Norm (Euclidean Distance)
312-
ts.l2_norm = std::sqrt(l2_dist_sq);
313+
ts.l2_dist = std::sqrt(l2_dist_sq);
313314
}
314315
}
315316
}

0 commit comments

Comments
 (0)