File tree Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Expand file tree Collapse file tree 1 file changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -1213,6 +1213,8 @@ kernel void kernel_out_prod_q4_0_impl(
12131213 for (int i0 = tpitg.x ; i0 < args.ne0 ; i0 += ntg.x ) {
12141214 const int ib = i0 / QK4_0;
12151215 const int ix = i0 % QK4_0;
1216+ const int iq = ix % (QK4_0 / 2 );
1217+ const bool upper = ix >= (QK4_0 / 2 );
12161218
12171219 float acc = 0 .0f ;
12181220
@@ -1221,8 +1223,8 @@ kernel void kernel_out_prod_q4_0_impl(
12211223 device const block_q4_0 * src0_row = (device const block_q4_0 *) src0_row_char;
12221224 const block_q4_0 blk = src0_row[ib];
12231225
1224- const uint8_t q = blk.qs [ix / 2 ];
1225- const int nibble = (ix & 1 ) ? (q >> 4 ) : (q & 0x0F );
1226+ const uint8_t q = blk.qs [iq ];
1227+ const int nibble = upper ? (q >> 4 ) : (q & 0x0F );
12261228 const float v0 = ((float ) blk.d ) * ((float ) nibble - 8 .0f );
12271229
12281230 device const src1_t * src1_row = (device const src1_t *)(src1_base + i01*args.nb11 );
You can’t perform that action at this time.
0 commit comments