File tree Expand file tree Collapse file tree 2 files changed +7
-9
lines changed Expand file tree Collapse file tree 2 files changed +7
-9
lines changed Original file line number Diff line number Diff line change 66
66
PromptReplacement , PromptUpdate ,
67
67
PromptUpdateDetails )
68
68
from vllm .multimodal .profiling import BaseDummyInputsBuilder
69
- from vllm .platforms import _Backend , current_platform
69
+ from vllm .platforms import _Backend
70
70
from vllm .sequence import IntermediateTensors
71
71
from vllm .transformers_utils .config import uses_mrope
72
72
from vllm .utils import is_list_of
@@ -336,14 +336,6 @@ def __init__(
336
336
}:
337
337
raise RuntimeError (
338
338
f"Qwen3-VL does not support { self .attn_backend } backend now." )
339
- if current_platform .is_device_capability (
340
- 100 ) and self .attn_backend != _Backend .TORCH_SDPA :
341
- # TODO(Roger/Wentao): remove this after FA
342
- # or XFORMERS's issue fixed on Blackwell
343
- logger .info_once ("Qwen3-VL vision attention does not support "
344
- f"{ self .attn_backend } backend on Blackwell now. "
345
- "Vision attention backend is set to TORCH_SDPA." )
346
- self .attn_backend = _Backend .TORCH_SDPA
347
339
348
340
self .blocks = nn .ModuleList ([
349
341
Qwen3_VisionBlock (
Original file line number Diff line number Diff line change @@ -205,6 +205,12 @@ def get_current_memory_usage(cls,
205
205
@classmethod
206
206
def get_vit_attn_backend (cls , head_size : int ,
207
207
dtype : torch .dtype ) -> _Backend :
208
+
209
+ # For Blackwell GPUs, force TORCH_SDPA for now.
210
+ # See https://github.com/facebookresearch/xformers/issues/1317#issuecomment-3199392579 # noqa: E501
211
+ if cls .has_device_capability (100 ):
212
+ return _Backend .TORCH_SDPA
213
+
208
214
if dtype not in (torch .float16 , torch .bfloat16 ):
209
215
return _Backend .XFORMERS
210
216
You can’t perform that action at this time.
0 commit comments