Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions vllm/platforms/rocm.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,11 @@ def get_attn_backend_cls(
return AttentionBackendEnum.ROCM_ATTN.get_path()

if selected_backend == AttentionBackendEnum.ROCM_AITER_FA:
if attn_selector_config.has_sink:
raise ValueError(
f"The selected backend, {selected_backend.name}, "
"does not support sinks."
)
if on_gfx9():
logger.info("Using Aiter Flash Attention backend.")
return AttentionBackendEnum.ROCM_AITER_FA.get_path()
Expand All @@ -345,7 +350,12 @@ def get_attn_backend_cls(

# Priority 2: Check for AITER MHA (Flash Attention)
# Only use if explicitly enabled (not just VLLM_ROCM_USE_AITER=1)
if envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MHA and on_gfx9():
if (
envs.VLLM_ROCM_USE_AITER
and envs.VLLM_ROCM_USE_AITER_MHA
and on_gfx9()
and not attn_selector_config.has_sink
):
logger.info("Using Aiter Flash Attention backend.")
return AttentionBackendEnum.ROCM_AITER_FA.get_path()

Expand All @@ -365,7 +375,8 @@ def get_attn_backend_cls(
if (
envs.VLLM_ROCM_USE_AITER
and on_gfx9()
and envs.VLLM_ROCM_USE_AITER_MHA is not False
and envs.VLLM_ROCM_USE_AITER_MHA
and not attn_selector_config.has_sink
):
logger.info("Using Aiter Flash Attention backend.")
return AttentionBackendEnum.ROCM_AITER_FA.get_path()
Expand Down