Skip to content

Commit f7b1ab2

Browse files
committed
Re-implement changes on top of #9400
1 parent 3c1e250 commit f7b1ab2

File tree

1 file changed

+263
-5
lines changed

1 file changed

+263
-5
lines changed

tools/imatrix/imatrix.cpp

Lines changed: 263 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
#include <fstream>
1717
#include <unordered_map>
1818
#include <map>
19+
#include <regex>
20+
#include <numeric>
1921

2022
#if defined(_MSC_VER)
2123
#pragma warning(disable: 4244 4267) // possible loss of data
@@ -24,10 +26,10 @@
2426
static void print_usage(int, char ** argv) {
2527
LOG("\nexample usage:\n");
2628
LOG("\n %s \\\n"
27-
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] \\\n"
28-
" [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
29-
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n"
30-
" [--parse-special]\n" , argv[0]);
29+
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--process-output] [--no-ppl] \\\n"
30+
" [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics] \\\n"
31+
" [--no-ppl] [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] \\\n"
32+
" [--parse-special] [...]\n" , argv[0]);
3133
LOG("\n");
3234
}
3335

@@ -40,6 +42,21 @@ struct Stats {
4042
std::vector<int64_t> counts;
4143
};
4244

45+
struct tensor_statistics {
46+
std::string tensor;
47+
Stats stats;
48+
float total_sqract = 0.0f;
49+
float mean_sqract = 0.0f;
50+
float max_sqract = 0.0f;
51+
float min_sqract = 0.0f;
52+
int elements = 0;
53+
float stddev = 0.0f;
54+
float active = 0.0f;
55+
float entropy = 0.0f;
56+
float zd = 0.0f;
57+
float cossim = 0.0f;
58+
};
59+
4360
class IMatrixCollector {
4461
public:
4562
IMatrixCollector() = default;
@@ -49,6 +66,7 @@ class IMatrixCollector {
4966
void save_imatrix(int32_t n_chunk = -1) const;
5067
bool load_imatrix_legacy(const char * fname);
5168
bool load_imatrix(const char * file_name);
69+
const std::unordered_map<std::string, Stats> & get_mstats() const { return m_stats; }
5270
private:
5371
std::unordered_map<std::string, Stats> m_stats;
5472
common_params m_params;
@@ -78,6 +96,127 @@ static std::string filter_tensor_name(const char * name) {
7896
return wname;
7997
}
8098

99+
static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) {
100+
std::vector<std::string> name;
101+
std::istringstream stream(input);
102+
std::string item;
103+
104+
while (std::getline(stream, item, '.')) {
105+
name.push_back(item);
106+
}
107+
for (size_t i = 0; i < name.size(); ++i) {
108+
if (name[i] == "blk" && i + 1 < name.size()) {
109+
layer = name[i + 1];
110+
break;
111+
}
112+
}
113+
for (size_t i = 0; i < name.size(); ++i) {
114+
if (name[i] == "weight" && i > 0) {
115+
tensor = name[i - 1];
116+
break;
117+
}
118+
}
119+
120+
if (tensor.empty()) {
121+
tensor = input;
122+
}
123+
if (layer.empty()) {
124+
layer = "-";
125+
}
126+
}
127+
128+
static void compute_statistics(std::vector<tensor_statistics> & tstats, const std::string & name, const Stats & e) {
129+
// if (e.values.size() != e.counts.size()) {
130+
if (e.values.size() % e.counts.size() != 0) {
131+
LOG_ERR("%s: activation size mismatch for tensor %s (%zu vs %zu)\n", __func__, name.c_str(), e.counts.size(), e.values.size());
132+
return;
133+
}
134+
if (e.counts.empty()) {
135+
LOG_ERR("%s: there are no activations for tensor %s. The imatrix may be suboptimal\n", __func__, name.c_str());
136+
return;
137+
}
138+
139+
const int n_mat = e.counts.size();
140+
const int row_size = e.values.size() / n_mat;
141+
142+
std::vector<float> activations;
143+
activations.reserve(e.values.size());
144+
145+
for (int i = 0; i < n_mat; ++i) {
146+
for (int j = 0; j < row_size; ++j) {
147+
activations.push_back(e.values[i*row_size + j] / e.counts[i]);
148+
}
149+
}
150+
151+
const float act_total = std::accumulate(activations.begin(), activations.end(), 0.0f);
152+
const float act_max = *std::max_element(activations.begin(), activations.end());
153+
const float act_min = *std::min_element(activations.begin(), activations.end());
154+
const float act_mean = act_total / activations.size();
155+
const float act_sqr_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
156+
const float act_var = (act_sqr_total / activations.size()) - (act_mean * act_mean);
157+
const float act_dev = std::sqrt(std::max(0.0f, act_var));
158+
float threshold = 1e-5f;
159+
const int inactive_count = std::count_if(activations.begin(), activations.end(),
160+
[threshold](const float v) { return fabsf(v) <= threshold; });
161+
const float active_ratio = 1 - static_cast<float>(inactive_count) / activations.size();
162+
163+
float entropy = 0;
164+
if (act_total > 0) {
165+
for (const auto act : activations) {
166+
if (const float p = act / act_total; p > 0) {
167+
entropy -= p * std::log2(p);
168+
}
169+
}
170+
}
171+
172+
int z_score = 0;
173+
if (act_dev > 0.0f) {
174+
for (const auto act : activations) {
175+
if (const float p = (act - act_mean) / act_dev; p > 1) {
176+
z_score++;
177+
}
178+
}
179+
}
180+
181+
auto & ts = tstats.emplace_back();
182+
ts.tensor = name;
183+
ts.stats = e;
184+
ts.total_sqract = act_total;
185+
ts.mean_sqract = act_mean;
186+
ts.max_sqract = act_max;
187+
ts.min_sqract = act_min;
188+
ts.elements = static_cast<int>(activations.size());
189+
ts.stddev = act_dev;
190+
ts.active = active_ratio;
191+
ts.entropy = entropy;
192+
ts.zd = static_cast<float>(z_score) / ts.elements;
193+
}
194+
195+
static void compute_cossim(std::vector<tensor_statistics> & tstats) {
196+
static const std::regex pattern(R"(blk\.(\d+)\.)");
197+
for (auto & ts : tstats) {
198+
if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
199+
const int blk = std::stoi(match[1]);
200+
std::string tname(ts.tensor);
201+
tname.replace(match.position(1), match.length(1), std::to_string(blk-1));
202+
auto prev = std::find_if(tstats.begin(), tstats.end(),
203+
[tname](const tensor_statistics & t) { return t.tensor == tname; });
204+
if (prev != tstats.end()) {
205+
const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(),
206+
prev->stats.values.begin(), 0.0f);
207+
const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(),
208+
ts.stats.values.begin(), 0.0f));
209+
const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(),
210+
prev->stats.values.begin(), 0.0f));
211+
const float cs = dp / (curr_mag * prev_mag);
212+
ts.cossim = cs;
213+
}
214+
} else {
215+
ts.cossim = 0;
216+
}
217+
}
218+
}
219+
81220
bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
82221
GGML_UNUSED(user_data);
83222

@@ -678,7 +817,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat
678817
return g_collector.collect_imatrix(t, ask, user_data);
679818
}
680819

681-
682820
struct results_log_softmax {
683821
double log_softmax;
684822
float logit;
@@ -926,6 +1064,113 @@ static bool compute_imatrix(llama_context * ctx, const common_params & params, c
9261064
return true;
9271065
}
9281066

1067+
static bool show_statistics(const common_params & params) {
1068+
std::vector<tensor_statistics> ts;
1069+
if (params.in_files.empty() || params.in_files.size() > 1) {
1070+
LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
1071+
return false;
1072+
}
1073+
if (g_collector.load_imatrix(params.in_files[0].c_str())) {
1074+
for (const auto & [name, stats] :g_collector.get_mstats()) {
1075+
compute_statistics(ts, name, stats);
1076+
}
1077+
} else {
1078+
LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
1079+
return false;
1080+
}
1081+
if (!ts.empty()) {
1082+
compute_cossim(ts);
1083+
} else {
1084+
LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
1085+
return false;
1086+
}
1087+
1088+
struct tensor_comparer {
1089+
bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
1090+
std::string layer, name_a, name_b;
1091+
;
1092+
process_tensor_name(a.tensor, layer, name_a);
1093+
process_tensor_name(b.tensor, layer, name_b);
1094+
return name_a < name_b || (name_a == name_b && a.total_sqract > b.total_sqract);
1095+
}
1096+
};
1097+
std::sort(ts.begin(), ts.end(), tensor_comparer());
1098+
1099+
struct weighted_stats {
1100+
float weighted_bias = 0.0f;
1101+
float weighted_zd = 0.0f;
1102+
float weighted_cossim = 0.0f;
1103+
int total_elements = 0;
1104+
};
1105+
std::map<int, weighted_stats> ws;
1106+
1107+
LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
1108+
LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", " Layer", " Tensor", " Σ(Act²)",
1109+
" Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD",
1110+
" CosSim");
1111+
LOG_INF(
1112+
"=============================================================================================================="
1113+
"===========================================================\n");
1114+
for (const auto & tstat : ts) {
1115+
std::string layer, name;
1116+
process_tensor_name(tstat.tensor, layer, name);
1117+
1118+
int blk;
1119+
try {
1120+
blk = std::stoi(layer);
1121+
} catch (const std::exception & e) {
1122+
blk = -1; // not a block layer
1123+
}
1124+
1125+
LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n",
1126+
layer.c_str(), name.c_str(), tstat.total_sqract, tstat.min_sqract, tstat.max_sqract, tstat.mean_sqract,
1127+
tstat.stddev, tstat.active * 100.0f, tstat.elements, tstat.entropy,
1128+
100.0f * (tstat.entropy / std::log2(tstat.elements)), 100.0f * tstat.zd, tstat.cossim);
1129+
1130+
const float weighted_bias = tstat.elements * tstat.total_sqract;
1131+
const float weighted_zd = tstat.elements * tstat.zd;
1132+
const float weighted_cossim = tstat.elements * tstat.cossim;
1133+
1134+
if (ws.find(blk) != ws.end()) {
1135+
ws[blk].weighted_bias += weighted_bias;
1136+
ws[blk].weighted_zd += weighted_zd;
1137+
ws[blk].weighted_cossim += weighted_cossim;
1138+
ws[blk].total_elements += tstat.elements;
1139+
} else {
1140+
weighted_stats temp_ws;
1141+
temp_ws.weighted_bias = weighted_bias;
1142+
temp_ws.weighted_zd = weighted_zd;
1143+
temp_ws.weighted_cossim = weighted_cossim;
1144+
temp_ws.total_elements = tstat.elements;
1145+
ws[blk] = temp_ws;
1146+
}
1147+
}
1148+
1149+
const int layers = std::count_if(ws.begin(), ws.end(), [](const auto & kv) { return kv.first >= 0; });
1150+
LOG_INF("\nComputing weighted average statistics per layer (%d layers)\n", layers);
1151+
LOG_INF("\n%s\t%s\t%s\t%s\n", " Layer", " μΣ(Act²)", " μZD", "μCosSim");
1152+
LOG_INF("================================================\n");
1153+
for (const auto & [first, second] : ws) {
1154+
const auto & layer = first;
1155+
const auto & stats = second;
1156+
1157+
if (stats.total_elements == 0) {
1158+
continue;
1159+
}
1160+
1161+
if (layer >= 0) {
1162+
const float bias = stats.weighted_bias / stats.total_elements;
1163+
const float zd = stats.weighted_zd / stats.total_elements;
1164+
const float cossim = stats.weighted_cossim / stats.total_elements;
1165+
1166+
LOG_INF("%5d\t%14.2f\t%10.4f%%\t%6.4f\n", layer, bias, 100.0f * zd, cossim);
1167+
}
1168+
}
1169+
LOG_INF("\n");
1170+
1171+
return true;
1172+
}
1173+
9291174
int main(int argc, char ** argv) {
9301175
common_params params;
9311176

@@ -938,6 +1183,19 @@ int main(int argc, char ** argv) {
9381183
return 1;
9391184
}
9401185

1186+
if (params.in_files.empty() || params.model.path.empty()) {
1187+
LOG_ERR("%s: an input file is required", __func__);
1188+
print_usage(argc, argv);
1189+
return 1;
1190+
}
1191+
1192+
if (params.show_statistics) {
1193+
if (!show_statistics(params)) {
1194+
return 1;
1195+
}
1196+
return 0;
1197+
}
1198+
9411199
common_init();
9421200

9431201
const int32_t n_ctx = params.n_ctx;

0 commit comments

Comments
 (0)