Update quantize.cpp

nicoboss · web-flow · commit 3fb41619fc47 · 2025-07-31T21:46:20.000+02:00
The new imatrix GGUF format stores per-matrice token counts instead of per-tensor chunk counts which makes it possible to fix NaN's for low bits per wight quants
diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp
@@ -289,7 +289,7 @@ static int load_imatrix(const std::string & imatrix_file, std::vector<std::strin
             const float count = ((const float *) counts->data)[j];
             if (count > 0.0f) {
                 for (int64_t i = 0; i < ne0; ++i) {
-                    e[j*ne0 + i] = ((const float *) sums->data)[j*ne0 + i] / count;
+                    e[j*ne0 + i] = (((const float *) sums->data)[j*ne0 + i] + 1.0f) / (count + 1.0f);
                 }
             } else {
                 // Partial imatrix data, this tensor never got any input during calibration

Original file line number	Diff line number	Diff line change
`@@ -289,7 +289,7 @@ static int load_imatrix(const std::string & imatrix_file, std::vector<std::strin`
`289`	`289`	`const float count = ((const float *) counts->data)[j];`
`290`	`290`	`if (count > 0.0f) {`
`291`	`291`	`for (int64_t i = 0; i < ne0; ++i) {`
`292`		`- e[jne0 + i] = ((const float ) sums->data)[j*ne0 + i] / count;`
	`292`	`+ e[jne0 + i] = (((const float ) sums->data)[j*ne0 + i] + 1.0f) / (count + 1.0f);`
`293`	`293`	`}`
`294`	`294`	`} else {`
`295`	`295`	`// Partial imatrix data, this tensor never got any input during calibration`