Skip to content

Commit 2cbf514

Browse files
committed
Fix Q4 OUT_PROD iq upper handling.
Signed-off-by: Marcus Edel <[email protected]>
1 parent 808e097 commit 2cbf514

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

ggml/src/ggml-metal/ggml-metal.metal

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,6 +1213,8 @@ kernel void kernel_out_prod_q4_0_impl(
12131213
for (int i0 = tpitg.x; i0 < args.ne0; i0 += ntg.x) {
12141214
const int ib = i0 / QK4_0;
12151215
const int ix = i0 % QK4_0;
1216+
const int iq = ix % (QK4_0 / 2);
1217+
const bool upper = ix >= (QK4_0 / 2);
12161218

12171219
float acc = 0.0f;
12181220

@@ -1221,8 +1223,8 @@ kernel void kernel_out_prod_q4_0_impl(
12211223
device const block_q4_0 * src0_row = (device const block_q4_0 *) src0_row_char;
12221224
const block_q4_0 blk = src0_row[ib];
12231225

1224-
const uint8_t q = blk.qs[ix / 2];
1225-
const int nibble = (ix & 1) ? (q >> 4) : (q & 0x0F);
1226+
const uint8_t q = blk.qs[iq];
1227+
const int nibble = upper ? (q >> 4) : (q & 0x0F);
12261228
const float v0 = ((float) blk.d) * ((float) nibble - 8.0f);
12271229

12281230
device const src1_t * src1_row = (device const src1_t *)(src1_base + i01*args.nb11);

0 commit comments

Comments
 (0)