Skip to content

Commit 5bb2def

Browse files
committed
Add --activation-statistics parameter
1 parent dadd90e commit 5bb2def

File tree

3 files changed

+14
-5
lines changed

3 files changed

+14
-5
lines changed

common/arg.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2707,6 +2707,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27072707
params.show_statistics = true;
27082708
}
27092709
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
2710+
add_opt(common_arg(
2711+
{"--activation-statistics"},
2712+
string_format("generate data to compute activation-based statistics (default: %s)", params.show_statistics ? "true" : "false"),
2713+
[](common_params & params) {
2714+
params.activation_statistics = true;
2715+
}
2716+
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
27102717
add_opt(common_arg(
27112718
{"--parse-special"},
27122719
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),

common/common.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -443,10 +443,11 @@ struct common_params {
443443
int32_t i_chunk = 0; // start processing from this chunk
444444
int8_t imat_dat = 0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat)
445445

446-
bool process_output = false; // collect data for the output tensor
447-
bool compute_ppl = true; // whether to compute perplexity
448-
bool show_statistics = false; // show imatrix statistics per tensor
449-
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
446+
bool process_output = false; // collect data for the output tensor
447+
bool compute_ppl = true; // whether to compute perplexity
448+
bool show_statistics = false; // show imatrix statistics per tensor
449+
bool activation_statistics = false; // generate data to calculate activation based statistics
450+
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
450451

451452
// cvector-generator params
452453
int n_pca_batch = 100;

tools/imatrix/imatrix.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ static void print_usage(int, char ** argv) {
3030
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n"
3131
" [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
3232
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
33-
" [--show-statistics] [...]\n" , argv[0]);
33+
" [--activation-statistics] [--show-statistics] [...]\n" , argv[0]);
3434
LOG("\n");
3535
}
3636

@@ -428,6 +428,7 @@ bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void *
428428
// broadcast, when loading an old imatrix
429429
e.counts.resize(n_as, e.counts[0]);
430430
}
431+
// ToDo: find an efficient way to implement --activation-statistics to avoid doubling the imatrix size by default
431432
if (e.values.empty()) {
432433
e.activations.resize(src1->ne[0]*n_as, 0);
433434
e.values.resize(src1->ne[0]*n_as, 0);

0 commit comments

Comments
 (0)