Fix scaler

horasal · horasal · commit d940de510114 · 2025-10-26T14:25:52.000+09:00
diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
@@ -102,6 +102,10 @@ typedef sycl::half2 ggml_half2;
 #define QI_MXFP4 (QK_MXFP4 / (4 * QR_MXFP4))
 #define QR_MXFP4 2
 
+#define QI_MXFP6_E3M2 (QK_MXFP6_E3M2 * 3 / (4 * 4))
+// FIXME: QR(Value Per Byte) does not match this
+#define QR_MXFP6_E3M2 2
+
 #define QI5_0 (QK5_0 / (4 * QR5_0))
 #define QR5_0 2
 
@@ -1103,6 +1107,8 @@ GGML_TABLE_BEGIN(int8_t, kvalues_mxfp4, 16)
     0, 1, 2, 3, 4, 6, 8, 12, 0, -1, -2, -3, -4, -6, -8, -12,
 GGML_TABLE_END()
 
+// 16^(-1)
+#define MXFP6_SCALER 0.0625f
 GGML_TABLE_BEGIN(int16_t, kvalues_mxfp6_e3m2, 64)
     0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28,
     32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224,
diff --git a/ggml/src/ggml-cpu/arch/x86/quants.c b/ggml/src/ggml-cpu/arch/x86/quants.c
@@ -860,7 +860,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con
         int ib = 0;
         float sumf = 0;
 
-    #if defined __AVX2__
+    #if 0 //defined __AVX2__
         __m256 accum_ps = _mm256_setzero_ps();
 
         for (; ib + 1 < nb; ib += 2) {
@@ -870,7 +870,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con
             const block_mxfp6_e3m2 * x2 = &x[ib + 1];
             const block_q8_0       * y2 = &y[ib + 1];
 
-            alignas(32) int16_t k_vals_1[32];
+            int16_t k_vals_1[32];
             {
                 const uint8_t * q3 = x1->qs;
                 for (int j = 0; j < 8; ++j) {
@@ -885,7 +885,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con
                 }
             }
 
-            alignas(32) int16_t k_vals_2[32];
+            int16_t k_vals_2[32];
             {
                 const uint8_t * q3 = x2->qs;
                 for (int j = 0; j < 8; ++j) {
diff --git a/ggml/src/ggml-cpu/quants.c b/ggml/src/ggml-cpu/quants.c
@@ -240,7 +240,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0_generic(int n, float * GGML_RESTRICT s, size_t
 
     for (; ib < nb; ++ib) {
         const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_E8M0_TO_FP32_HALF(x[ib].e);
-        int sumi1 = 0;
+        int sumi = 0;
         // Q8_0 (y) * MXFP6 (block_size = 32)
         for (int j = 0; j < QK_MXFP6_E3M2/4; ++j) {
             // Current Packed MXFP6
@@ -252,7 +252,6 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0_generic(int n, float * GGML_RESTRICT s, size_t
             const uint8_t b1 = q3[1];
             const uint8_t b2 = q3[2];
 
-            const uint8_t v0_idx = b0 & 0x3F;
             const uint8_t v0_idx = b0 & 0x3F;
             const uint8_t v1_idx = (b0 >> 6) | ((b1 & 0x0F) << 2);
             const uint8_t v2_idx = (b1 >> 4) | ((b2 & 0x03) << 4);
diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu
@@ -506,7 +506,7 @@ static __global__ void dequantize_block_mxfp6_e3m2(const void * __restrict__ vx,
 
         const uint8_t b0 = q3[0];
         const uint8_t b1 = q3[1];
-        const uint8_t b3 = q3[2];
+        const uint8_t b2 = q3[2];
 
         const uint8_t v0_idx = b0 & 0x3F;
         const uint8_t v1_idx = (b0 >> 6) | ((b1 & 0x0F) << 2);
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
@@ -301,7 +301,7 @@ void quantize_row_mxfp6_e3m2_ref(const float * GGML_RESTRICT x, block_mxfp6_e3m2
             }
         }
 
-        const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 9 + 127) : 0;
+        const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 4 + 127) : 0;
 
         const float d = GGML_E8M0_TO_FP32_HALF(e);
 
diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
@@ -920,7 +920,7 @@ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
         with np.errstate(divide="ignore"):
             # convert log2(d_max) to e8m0
             # log2(448) = 8.8 -> shift 9
-            e = np.where(d_max > 0, np.floor(np.log2(d_max)) - 9 + 127, 0).astype(
+            e = np.where(d_max > 0, np.floor(np.log2(d_max)) - 4 + 127, 0).astype(
                 np.uint8
             )
 

Original file line number	Diff line number	Diff line change
`@@ -860,7 +860,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con`
`860`	`860`	`int ib = 0;`
`861`	`861`	`float sumf = 0;`
`862`	`862`
`863`		`- #if defined __AVX2__`
	`863`	`+ #if 0 //defined __AVX2__`
`864`	`864`	`__m256 accum_ps = _mm256_setzero_ps();`
`865`	`865`
`866`	`866`	`for (; ib + 1 < nb; ib += 2) {`
`@@ -870,7 +870,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con`
`870`	`870`	`const block_mxfp6_e3m2 * x2 = &x[ib + 1];`
`871`	`871`	`const block_q8_0 * y2 = &y[ib + 1];`
`872`	`872`
`873`		`- alignas(32) int16_t k_vals_1[32];`
	`873`	`+ int16_t k_vals_1[32];`
`874`	`874`	`{`
`875`	`875`	`const uint8_t * q3 = x1->qs;`
`876`	`876`	`for (int j = 0; j < 8; ++j) {`
`@@ -885,7 +885,7 @@ void ggml_vec_dot_mxfp6_e3m2_q8_0(int n, float * GGML_RESTRICT s, size_t bs, con`
`885`	`885`	`}`
`886`	`886`	`}`
`887`	`887`
`888`		`- alignas(32) int16_t k_vals_2[32];`
	`888`	`+ int16_t k_vals_2[32];`
`889`	`889`	`{`
`890`	`890`	`const uint8_t * q3 = x2->qs;`
`891`	`891`	`for (int j = 0; j < 8; ++j) {`
Original file line number	Diff line number	Diff line change
`@@ -301,7 +301,7 @@ void quantize_row_mxfp6_e3m2_ref(const float * GGML_RESTRICT x, block_mxfp6_e3m2`
`301`	`301`	`}`
`302`	`302`	`}`
`303`	`303`
`304`		`- const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 9 + 127) : 0;`
	`304`	`+ const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 4 + 127) : 0;`
`305`	`305`
`306`	`306`	`const float d = GGML_E8M0_TO_FP32_HALF(e);`
`307`	`307`
Original file line number	Diff line number	Diff line change
`@@ -920,7 +920,7 @@ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:`
`920`	`920`	`with np.errstate(divide="ignore"):`
`921`	`921`	`# convert log2(d_max) to e8m0`
`922`	`922`	`# log2(448) = 8.8 -> shift 9`
`923`		`- e = np.where(d_max > 0, np.floor(np.log2(d_max)) - 9 + 127, 0).astype(`
	`923`	`+ e = np.where(d_max > 0, np.floor(np.log2(d_max)) - 4 + 127, 0).astype(`
`924`	`924`	`np.uint8`
`925`	`925`	`)`
`926`	`926`