Skip to content

Commit 65ecb4f

Browse files
authored
[Bugfix] Fallback ViT attn backend to SDPA for blackwell (vllm-project#25851)
Signed-off-by: Roger Wang <[email protected]>
1 parent 143844f commit 65ecb4f

File tree

2 files changed

+7
-9
lines changed

2 files changed

+7
-9
lines changed

vllm/model_executor/models/qwen3_vl.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@
6666
PromptReplacement, PromptUpdate,
6767
PromptUpdateDetails)
6868
from vllm.multimodal.profiling import BaseDummyInputsBuilder
69-
from vllm.platforms import _Backend, current_platform
69+
from vllm.platforms import _Backend
7070
from vllm.sequence import IntermediateTensors
7171
from vllm.transformers_utils.config import uses_mrope
7272
from vllm.utils import is_list_of
@@ -336,14 +336,6 @@ def __init__(
336336
}:
337337
raise RuntimeError(
338338
f"Qwen3-VL does not support {self.attn_backend} backend now.")
339-
if current_platform.is_device_capability(
340-
100) and self.attn_backend != _Backend.TORCH_SDPA:
341-
# TODO(Roger/Wentao): remove this after FA
342-
# or XFORMERS's issue fixed on Blackwell
343-
logger.info_once("Qwen3-VL vision attention does not support "
344-
f"{self.attn_backend} backend on Blackwell now. "
345-
"Vision attention backend is set to TORCH_SDPA.")
346-
self.attn_backend = _Backend.TORCH_SDPA
347339

348340
self.blocks = nn.ModuleList([
349341
Qwen3_VisionBlock(

vllm/platforms/cuda.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,12 @@ def get_current_memory_usage(cls,
205205
@classmethod
206206
def get_vit_attn_backend(cls, head_size: int,
207207
dtype: torch.dtype) -> _Backend:
208+
209+
# For Blackwell GPUs, force TORCH_SDPA for now.
210+
# See https://github.com/facebookresearch/xformers/issues/1317#issuecomment-3199392579 # noqa: E501
211+
if cls.has_device_capability(100):
212+
return _Backend.TORCH_SDPA
213+
208214
if dtype not in (torch.float16, torch.bfloat16):
209215
return _Backend.XFORMERS
210216

0 commit comments

Comments
 (0)