Skip to content

Commit 54f6d57

Browse files
authored
[KERNELS] reduce routing runtime by 5% by using sem="relaxed" (#6866)
1 parent 5b6bf5d commit 54f6d57

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

python/triton_kernels/triton_kernels/reduction_details/reduce_bitmatrix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def _sum_bitmatrix_rows(B, shape_bm, stride_bm, # input bitmatrix
6262
bits = tl.load(B + offs_m[None, :] * stride_bm + offs_b[:, None], mask=offs_m[None, :] < shape_bm)
6363
ret = tl.reshape(vpopc(bits), [BLOCK_N])
6464
mask = offs_n < shape_pn
65-
tl.atomic_add(Ret + offs_n, ret, mask=mask)
65+
tl.atomic_add(Ret + offs_n, ret, mask=mask, sem="relaxed")
6666
tl.store(Partials + pid_m * stride_pm + offs_n, ret, mask=mask)
6767

6868

0 commit comments

Comments
 (0)