Skip to content

Commit f45870b

Browse files
fix: allow LFM2 MoE prefix caching (align) (vllm-project#33376)
Signed-off-by: Tianshu Yu <[email protected]>
1 parent ba45bed commit f45870b

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

vllm/model_executor/models/lfm2_moe.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -651,9 +651,11 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None:
651651
quant_config = vllm_config.quant_config
652652
cache_config = vllm_config.cache_config
653653

654-
assert not cache_config.enable_prefix_caching, (
655-
"Lfm2Moe currently does not support prefix caching"
656-
)
654+
if cache_config.mamba_cache_mode == "all":
655+
raise NotImplementedError(
656+
"Lfm2Moe currently does not support 'all' prefix caching, "
657+
"please use '--mamba-cache-mode=align' instead"
658+
)
657659

658660
super().__init__()
659661
self.config = config

vllm/model_executor/models/lfm2_vl.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from vllm.config.multimodal import BaseDummyOptions
2323
from vllm.forward_context import set_forward_context
2424
from vllm.model_executor.layers.mamba.mamba_utils import (
25+
MambaStateCopyFunc,
26+
MambaStateCopyFuncCalculator,
2527
MambaStateDtypeCalculator,
2628
MambaStateShapeCalculator,
2729
)
@@ -584,6 +586,10 @@ def get_mamba_state_shape_from_config(
584586
conv_kernel=hf_language_config.conv_L_cache,
585587
)
586588

589+
@classmethod
590+
def get_mamba_state_copy_func(cls) -> tuple[MambaStateCopyFunc]:
591+
return MambaStateCopyFuncCalculator.short_conv_state_copy_func()
592+
587593
def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"):
588594
super().__init__()
589595
config: Lfm2VlConfig = vllm_config.model_config.hf_config

0 commit comments

Comments
 (0)