Skip to content

Commit b51255f

Browse files
[ROCm] Fix broken import in platform attention backend dispatching (vllm-project#30432)
Signed-off-by: Andreas Karatzas <akaratza@amd.com>
1 parent b4054c8 commit b51255f

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

vllm/platforms/rocm.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,21 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
403403
compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE
404404

405405
if cache_config and cache_config.block_size is None:
406-
cache_config.block_size = 16
406+
if (
407+
envs.VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION and envs.VLLM_ROCM_USE_AITER
408+
# NOTE: This block has been deprecated
409+
# or get_env_variable_attn_backend()
410+
# == AttentionBackendEnum.ROCM_AITER_UNIFIED_ATTN
411+
# TODO: monitor https://github.com/vllm-project/vllm/pull/30396
412+
# to see how we can transition to the new way of selecting
413+
# attention backends
414+
):
415+
cache_config.block_size = 64
416+
logger.warning(
417+
"[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
418+
)
419+
else:
420+
cache_config.block_size = 16
407421

408422
if parallel_config.worker_cls == "auto":
409423
parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker"

0 commit comments

Comments
 (0)