Skip to content

Commit b42566f

Browse files
authored
[Bug] Fix is_flashmla_supported Check Error (vllm-project#24774)
Signed-off-by: yewentao256 <[email protected]>
1 parent d96e111 commit b42566f

File tree

2 files changed

+4
-26
lines changed

2 files changed

+4
-26
lines changed

vllm/attention/backends/flashmla.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from vllm.attention.ops.flashmla import (flash_mla_with_kvcache,
1818
get_mla_metadata,
1919
is_flashmla_supported)
20-
from vllm.platforms.cuda import CudaPlatform
2120

2221

2322
class FlashMLABackend(MLACommonBackend):
@@ -179,18 +178,8 @@ def __init__(
179178
logits_soft_cap, attn_type,
180179
kv_sharing_target_layer_name, **mla_args)
181180

182-
assert is_flashmla_supported(), \
183-
"FlashMLA is not supported on this device"
184-
185-
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
186-
# context:
187-
# https://github.com/deepseek-ai/FlashMLA/issues/83
188-
# https://github.com/vllm-project/vllm/issues/24513
189-
if CudaPlatform.has_device_capability(100):
190-
raise NotImplementedError(
191-
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
192-
"Please use CUTLASS_MLA or TRITON_MLA instead. "
193-
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`")
181+
is_supported, reason = is_flashmla_supported()
182+
assert is_supported, reason
194183

195184
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
196185
if any(unsupported_features):

vllm/v1/attention/backends/mla/flashmla.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
is_flashmla_supported)
1313
from vllm.config import VllmConfig
1414
from vllm.logger import init_logger
15-
from vllm.platforms.cuda import CudaPlatform
1615
from vllm.v1.attention.backends.mla.common import (MLACommonBackend,
1716
MLACommonDecodeMetadata,
1817
MLACommonImpl,
@@ -156,18 +155,8 @@ def __init__(
156155
logits_soft_cap, attn_type,
157156
kv_sharing_target_layer_name, **mla_args)
158157

159-
assert is_flashmla_supported(), \
160-
"FlashMLA is not supported on this device"
161-
162-
# disallow FlashMLA on NVIDIA Blackwell (SM 10.0+) GPUs
163-
# context:
164-
# https://github.com/deepseek-ai/FlashMLA/issues/83
165-
# https://github.com/vllm-project/vllm/issues/24513
166-
if CudaPlatform.has_device_capability(100):
167-
raise NotImplementedError(
168-
"FlashMLA is temporarily disabled on Blackwell (SM 10.0). "
169-
"Please use CUTLASS_MLA or TRITON_MLA instead. "
170-
"Example: `export VLLM_ATTENTION_BACKEND=CUTLASS_MLA`")
158+
is_supported, reason = is_flashmla_supported()
159+
assert is_supported, reason
171160

172161
unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap]
173162
if any(unsupported_features):

0 commit comments

Comments
 (0)