Skip to content

Commit 1e799b7

Browse files
[BugFix] Fix MLA + V1 + TP==1 causing reinitialization of cuda context (#14910)
1 parent 7f6c5ee commit 1e799b7

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

vllm/platforms/cuda.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
152152
# here
153153
use_flashmla = (envs.VLLM_ATTENTION_BACKEND is None \
154154
or envs.VLLM_ATTENTION_BACKEND == "FLASHMLA")
155-
from vllm.attention.backends.flashmla import is_flashmla_supported
155+
from vllm.attention.ops.flashmla import is_flashmla_supported
156156
if use_flashmla and is_flashmla_supported()[0] \
157157
and cache_config.block_size != 64:
158158
cache_config.block_size = 64

0 commit comments

Comments
 (0)