Skip to content

Commit a07ff61

Browse files
fsx950223Doug Lehr
authored andcommitted
Fix env for chunked prefill
Signed-off-by: fsx950223 <[email protected]>
1 parent 2b43479 commit a07ff61

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

vllm/attention/backends/rocm_flash_attn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,8 @@ def forward(
943943
use_custom = use_rocm_custom_paged_attention(
944944
decode_query.dtype, head_size, block_size, gqa_ratio,
945945
decode_meta.max_decode_seq_len, self.sliding_window,
946-
self.kv_cache_dtype, self.alibi_slopes)
946+
self.kv_cache_dtype,
947+
self.alibi_slopes) and not is_rocm_aiter_paged_attn_enabled()
947948

948949
if use_custom:
949950
max_seq_len = (decode_meta.max_decode_seq_len if self.attn_type

vllm/platforms/rocm.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -150,9 +150,8 @@ def use_rocm_custom_paged_attention(
150150
and (block_size == 16 or block_size == 32)
151151
and (gqa_ratio >= 1 and gqa_ratio <= 16)
152152
and max_seq_len <= 128 * 1024
153-
and (envs.VLLM_ROCM_CUSTOM_PAGED_ATTN)
154-
and not (envs.VLLM_ROCM_USE_AITER_PAGED_ATTN
155-
and envs.VLLM_ROCM_USE_AITER) and sinks is None)
153+
and (envs.VLLM_ROCM_CUSTOM_PAGED_ATTN) and sinks is None)
154+
156155
else:
157156
return (ON_GFX11_GFX12 and (not envs.VLLM_USE_V1 or sliding_window == 0
158157
or sliding_window == (-1, -1))

0 commit comments

Comments
 (0)