We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 7f6c5ee commit 1e799b7Copy full SHA for 1e799b7
vllm/platforms/cuda.py
@@ -152,7 +152,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
152
# here
153
use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \
154
or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
155
- from vllm.attention.backends.flashmla import is_flashmla_supported
+ from vllm.attention.ops.flashmla import is_flashmla_supported
156
if use_flashmla and is_flashmla_supported()[0] \
157
and cache_config.block_size != 64:
158
cache_config.block_size = 64
0 commit comments