File tree Expand file tree Collapse file tree 1 file changed +15
-1
lines changed
Expand file tree Collapse file tree 1 file changed +15
-1
lines changed Original file line number Diff line number Diff line change @@ -403,7 +403,21 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None:
403403 compilation_config .cudagraph_mode = CUDAGraphMode .PIECEWISE
404404
405405 if cache_config and cache_config .block_size is None :
406- cache_config .block_size = 16
406+ if (
407+ envs .VLLM_ROCM_USE_AITER_UNIFIED_ATTENTION and envs .VLLM_ROCM_USE_AITER
408+ # NOTE: This block has been deprecated
409+ # or get_env_variable_attn_backend()
410+ # == AttentionBackendEnum.ROCM_AITER_UNIFIED_ATTN
411+ # TODO: monitor https://github.com/vllm-project/vllm/pull/30396
412+ # to see how we can transition to the new way of selecting
413+ # attention backends
414+ ):
415+ cache_config .block_size = 64
416+ logger .warning (
417+ "[ROCM_AITER_UNIFIED_ATTN]: Setting kv cache block size to 64."
418+ )
419+ else :
420+ cache_config .block_size = 16
407421
408422 if parallel_config .worker_cls == "auto" :
409423 parallel_config .worker_cls = "vllm.v1.worker.gpu_worker.Worker"
You can’t perform that action at this time.
0 commit comments