Skip to content

Commit 24e53d2

Browse files
adjust best config for attn (#2516)
1 parent b6cdccd commit 24e53d2

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

benchmarks/triton_kernels_benchmark/flash_attention_fwd_benchmark.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def forward(q, k, v, causal, sm_scale):
171171
assert Lk in {16, 32, 64, 128}
172172
o = torch.empty_like(q, dtype=torch.float32)
173173
BLOCK_M = 128
174-
BLOCK_N = 64 if Lk <= 64 else 32
174+
BLOCK_N = 64
175175
num_stages = 3
176176
num_warps = 8 if Lq == 64 else 16
177177
stage = 3 if causal else 1
@@ -205,7 +205,8 @@ def forward(q, k, v, causal, sm_scale):
205205
BLOCK_DMODEL=Lk, #
206206
STAGE=stage, #
207207
num_warps=num_warps, #
208-
num_stages=num_stages #
208+
num_stages=num_stages, #
209+
grf_mode='large', #
209210
)
210211
return o
211212

0 commit comments

Comments
 (0)