Skip to content

Commit db3ded2

Browse files
committed
gla: Put the barrier inside the main logic loop
1 parent 81d8529 commit db3ded2

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

ggml/src/ggml-sycl/gla.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B,
3030
for (u_int i = 0; i < head_size; i++) {
3131
state[i] = s[batch_i * state_size + head_i * head_size * head_size + i * head_size + tid];
3232
}
33-
item.barrier(sycl::access::fence_space::local_space); //sync threads
33+
3434
for (u_int t = batch_i * n_seq_tokens * C + head_i * head_size + tid;
3535
t < (batch_i + 1) * n_seq_tokens * C + head_i * head_size + tid; t += C) {
36+
37+
item.barrier(sycl::access::fence_space::local_space); //sync threads
3638
_k[tid] = k[t];
3739
_r[tid] = r[t];
3840
_td[tid] = td[t];

0 commit comments

Comments
 (0)