re-enable custom paged attention for V0 (ROCm#511)

charlifu · web-flow · commit b8498bc4a1c2 · 2025-04-10T13:37:40.000-05:00
Signed-off-by: charlifu &lt;charlifu@amd.com&gt;
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
@@ -113,7 +113,8 @@ def use_rocm_custom_paged_attention(qtype: torch.dtype, head_size: int,
 
     # rocm custom page attention not support on navi (gfx1*)
     return (ON_MI250_MI300 and not ON_NAVI
-            and (sliding_window == 0 or sliding_window == (-1, -1))
+            and (not envs.VLLM_USE_V1 or sliding_window == 0
+                 or sliding_window == (-1, -1))
             and (qtype == torch.half or qtype == torch.bfloat16)
             and (head_size == 64 or head_size == 128)
             and (block_size == 16 or block_size == 32)