File tree Expand file tree Collapse file tree 1 file changed +7
-6
lines changed
lmdeploy/pytorch/backends/cuda Expand file tree Collapse file tree 1 file changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -475,9 +475,10 @@ def __init__(self,
475475 self .use_deep_gemm = False
476476 logger .warning ('For higher performance, please install DeepGEMM https://github.com/deepseek-ai/DeepGEMM' )
477477 try :
478- import deep_ep
479- from dlblas .layers .moe .token_dispatcher import DeepEPBuffer , DeepEPMode
480- self .use_deepep = True
478+ from dlblas .layers .moe .token_dispatcher import DeepEPBuffer , DeepEPMode , use_deepep
479+ self .use_deepep = use_deepep
480+ self .deepep_buffer = DeepEPBuffer
481+ self .deepep_mode = DeepEPMode
481482 except ImportError :
482483 self .use_deepep = False
483484 logger .warning ('For higher performance, please install DeepEP https://github.com/deepseek-ai/DeepEP' )
@@ -601,11 +602,11 @@ def _patched_fusedmoe_forward(*args, **kwargs):
601602
602603 def update_dispatch_mode (self ):
603604 if self .use_deepep :
604- deepep_mode = DeepEPMode .NORMAL
605+ deepep_mode_type = self . deepep_mode .NORMAL
605606 step_ctx = get_step_ctx_manager ().current_context ()
606607 if step_ctx .is_decoding :
607- deepep_mode = DeepEPMode .LOW_LATENCY
608- DeepEPBuffer . set_deepep_mode (deepep_mode )
608+ deepep_mode_type = self . deepep_mode .LOW_LATENCY
609+ self . deepep_buffer . set_deepep_mode (deepep_mode_type )
609610
610611
611612class TritonFusedMoEBlockedF8Builder (FusedMoEBlockedF8Builder ):
You can’t perform that action at this time.
0 commit comments