Skip to content

Commit 53f65c3

Browse files
committed
imatrix : use GGUF by default
Still uses the old format when the output filename ends with .dat but this can be overridden with --output-format
1 parent a12363b commit 53f65c3

File tree

4 files changed

+24
-5
lines changed

4 files changed

+24
-5
lines changed

common/arg.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2627,6 +2627,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26272627
params.n_out_freq = value;
26282628
}
26292629
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
2630+
add_opt(common_arg(
2631+
{"--output-format"}, "{gguf,dat}",
2632+
string_format("output format for imatrix file (default: gguf except when output filename ends with .dat)"),
2633+
[](common_params & params, const std::string & value) {
2634+
/**/ if (value == "gguf") { params.imat_out_type = COMMON_IMATRIX_FORMAT_GGUF; }
2635+
else if (value == "dat") { params.imat_out_type = COMMON_IMATRIX_FORMAT_DAT; }
2636+
else { throw std::invalid_argument("invalid output format"); }
2637+
}
2638+
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
26302639
add_opt(common_arg(
26312640
{"--save-frequency"}, "N",
26322641
string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq),

common/common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,12 @@ enum common_reasoning_format {
233233
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
234234
};
235235

236+
enum common_imatrix_format_type {
237+
COMMON_IMATRIX_FORMAT_AUTO,
238+
COMMON_IMATRIX_FORMAT_GGUF,
239+
COMMON_IMATRIX_FORMAT_DAT, // legacy
240+
};
241+
236242
struct common_params {
237243
int32_t n_predict = -1; // new tokens to predict
238244
int32_t n_ctx = 4096; // context size
@@ -431,6 +437,7 @@ struct common_params {
431437
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
432438
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
433439
int32_t i_chunk = 0; // start processing from this chunk
440+
common_imatrix_format_type imat_out_type = COMMON_IMATRIX_FORMAT_AUTO; // format of the output imatrix
434441

435442
bool process_output = false; // collect data for the output tensor
436443
bool compute_ppl = true; // whether to compute perplexity

tools/imatrix/README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ More information is available in <https://github.com/ggml-org/llama.cpp/pull/486
77

88
```
99
./llama-imatrix \
10-
-m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \
10+
-m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \
1111
[--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \
1212
[--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \
1313
[--show-statistics] [...]
@@ -20,6 +20,7 @@ The parameters in square brackets are optional and have the following meaning:
2020
* `-lv | --verbosity` specifies the verbosity level. If set to `0`, no output other than the perplexity of the processed chunks will be generated. If set to `1`, each time the results are saved a message is written to `stderr`. If `>=2`, a message is output each time data is collected for any tensor. Default verbosity level is `1`.
2121
* `-o | --output-file` specifies the name of the file where the computed data will be stored. If missing `imatrix.gguf` is used.
2222
* `-ofreq | --output-frequency` specifies how often the so far computed result is saved to disk. Default is 10 (i.e., every 10 chunks)
23+
* `--output-format` specifies the output format of the generated imatrix file. Either "gguf", or "dat" (the legacy format). Defaults to "gguf" unless the output filename ends with `.dat`.
2324
* `--save-frequency` specifies how often to save a copy of the imatrix in a separate file. Default is 0 (i.e., never)
2425
* `--process-output` specifies if data will be collected for the `output.weight` tensor. Typically, it is better not to utilize the importance matrix when quantizing `output.weight`, so this is set to `false` by default.
2526
* `--in-file` one or more existing imatrix files to load and combine. Useful for merging files from multiple runs/datasets.

tools/imatrix/imatrix.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
static void print_usage(int, char ** argv) {
2727
LOG("\nexample usage:\n");
2828
LOG("\n %s \\\n"
29-
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--no-ppl] \\\n"
29+
" -m model.gguf -f some-text.txt [-o imatrix.gguf] [--output-format {gguf,dat}] [--no-ppl] \\\n"
3030
" [--process-output] [--chunk 123] [--save-frequency 0] [--output-frequency 10] \\\n"
3131
" [--in-file imatrix-prev-0.gguf --in-file imatrix-prev-1.gguf ...] [--parse-special] \\\n"
3232
" [--show-statistics] [...]\n" , argv[0]);
@@ -492,13 +492,15 @@ void IMatrixCollector::save_imatrix_legacy(int32_t ncall) const {
492492

493493
void IMatrixCollector::save_imatrix(int32_t n_chunk) const {
494494
auto fname = m_params.out_file;
495+
auto imat_type = m_params.imat_out_type;
495496

496-
// TODO: use the new format in more cases
497-
if (!string_ends_with(fname, ".gguf")) {
498-
LOG_WRN("\n%s: saving to legacy imatrix format because output suffix is not .gguf\n", __func__);
497+
if ((imat_type == COMMON_IMATRIX_FORMAT_AUTO && string_ends_with(fname, ".dat")) ||
498+
(imat_type == COMMON_IMATRIX_FORMAT_DAT)) {
499+
LOG_WRN("\n%s: saving to legacy imatrix format\n", __func__);
499500
this->save_imatrix_legacy(n_chunk);
500501
return;
501502
}
503+
// else, default to GGUF imatrix
502504

503505
if (n_chunk > 0) {
504506
fname += ".at_";

0 commit comments

Comments
 (0)