Skip to content

Commit 6a199dd

Browse files
committed
Revert "CUDA: fix race conditions FlashAttention kernels (ggml-org#13438)"
This reverts commit 0208355.
1 parent 4721a56 commit 6a199dd

File tree

2 files changed

+0
-3
lines changed

2 files changed

+0
-3
lines changed

ggml/src/ggml-cuda/fattn-mma-f16.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -874,8 +874,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
874874
}
875875
}
876876

877-
__syncthreads();
878-
879877
// Write back combined meta data:
880878
#pragma unroll
881879
for (int imeta = 0; imeta < nmeta; ++imeta) {

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,6 @@ static __global__ void flash_attn_vec_ext_f16(
168168
for (int j = 0; j < ncols; ++j) {
169169
KQ[j*D + tid] = -HALF_MAX_HALF;
170170
}
171-
__syncthreads();
172171

173172
half2 VKQ[ncols] = {{0.0f, 0.0f}};
174173

0 commit comments

Comments
 (0)