Skip to content

Commit b4d3d98

Browse files
Set one_matrix_per_load_for_bt to False
Signed-off-by: Whitney Tsang <[email protected]>
1 parent 4e05a0a commit b4d3d98

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

benchmarks/triton_kernels_benchmark/flash_attention_fwd_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def _attn_fwd(Q, K, V, sm_scale, M, Out, #
157157

158158

159159
configs = [
160-
triton.Config({'BLOCK_M': BM, 'BLOCK_N': BN, 'grf_mode': 'large', 'one_matrix_per_load_for_bt': True}, num_stages=s, num_warps=w) \
160+
triton.Config({'BLOCK_M': BM, 'BLOCK_N': BN, 'grf_mode': 'large', 'one_matrix_per_load_for_bt': False}, num_stages=s, num_warps=w) \
161161
for BM in [128, 256] \
162162
for BN in [32, 64] \
163163
for s in [3, 4] \

0 commit comments

Comments
 (0)