Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions ggml/src/ggml-metal.metal
Original file line number Diff line number Diff line change
Expand Up @@ -2776,11 +2776,11 @@ kernel void kernel_flash_attn_ext_vec_f16(
const short iv3 = iq3 / rv3;

// load the queries from shared memory into local memory
float4 mq[D4];
float4 mq[D4/NW];

for (short ii = 0; ii < D4; ii += NW) {
short i = ii + tiisg;
mq[i] = (float4) sq4[i];
mq[i/NW] = (float4) sq4[i];
}

// pointer to the mask
Expand Down Expand Up @@ -2812,7 +2812,7 @@ kernel void kernel_flash_attn_ext_vec_f16(
mk[2] = (float4) pk4[i + 2*(nb11/8)];
mk[3] = (float4) pk4[i + 3*(nb11/8)];

mqk += (float4) (mq[i] * mk);
mqk += (float4) (mq[i/NW] * mk);
}

// reduce the results from the threads in the simdgroup
Expand Down
Loading