@@ -437,7 +437,7 @@ vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
437437#if defined(DATA_A_MXFP4)
438438vec2 dequantize(uint ib, uint iqs, uint a_offset) {
439439 const uint vui = uint (data_a[a_offset + ib].qs[iqs]);
440- return vec2 (kvalues_mxfp4[vui & 0xF], kvalues_mxfp4[vui >> 4 ]);
440+ return vec2 (kvalues_mxfp4[vui & 0xF], kvalues_mxfp4[vui >> 4 ]) * 0.5 ;
441441}
442442vec4 dequantize4(uint ib, uint iqs, uint a_offset) {
443443 vec2 v0 = dequantize(ib, iqs, a_offset);
@@ -488,9 +488,9 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
488488
489489 const uvec2 qs = uvec2 (data_a[a_offset + ib].qs[qsi], data_a[a_offset + ib].qs[qsi + 1 ]);
490490 const uint scales = data_a[a_offset + ib].scales[scalesi];
491- const vec2 d = vec2 (data_a[a_offset + ib].d );
491+ const vec2 dm = vec2 (data_a[a_offset + ib].dm );
492492
493- return d .x * float (scales & 0xF) * vec2 ((qs >> qsshift) & 3 ) - d .y * float (scales >> 4 );
493+ return dm .x * float (scales & 0xF) * vec2 ((qs >> qsshift) & 3 ) - dm .y * float (scales >> 4 );
494494}
495495vec2 get_dm(uint ib, uint a_offset) {
496496 return vec2 (1 , 0 );
@@ -529,7 +529,7 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
529529 const uint is = 2 * n + b; // 0..7
530530 const uint qsi = n * 32 + (iqs % 16 ) * 2 ; // 0,2,4..126
531531
532- const vec2 loadd = vec2 (data_a[a_offset + ib].d );
532+ const vec2 loadd = vec2 (data_a[a_offset + ib].dm );
533533
534534 const uint scidx0 = (is < 4 ) ? is : (is + 4 );
535535 const uint scidx1 = (is < 4 ) ? is : (is - 4 );
@@ -567,7 +567,7 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
567567
568568 const uint8_t hm = uint8_t(1 << (iqs / 16 ));
569569
570- const vec2 loadd = vec2 (data_a[a_offset + ib].d );
570+ const vec2 loadd = vec2 (data_a[a_offset + ib].dm );
571571
572572 const uint scidx0 = (is < 4 ) ? is : (is + 4 );
573573 const uint scidx1 = (is < 4 ) ? is : (is - 4 );
0 commit comments