Skip to content

Commit 6a7c734

Browse files
committed
update_dlblas_deepep
1 parent 6cc9e6f commit 6a7c734

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

lmdeploy/pytorch/backends/cuda/moe.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -475,9 +475,10 @@ def __init__(self,
475475
self.use_deep_gemm = False
476476
logger.warning('For higher performance, please install DeepGEMM https://github.com/deepseek-ai/DeepGEMM')
477477
try:
478-
import deep_ep
479-
from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode
480-
self.use_deepep = True
478+
from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode, use_deepep
479+
self.use_deepep = use_deepep
480+
self.deepep_buffer = DeepEPBuffer
481+
self.deepep_mode = DeepEPMode
481482
except ImportError:
482483
self.use_deepep = False
483484
logger.warning('For higher performance, please install DeepEP https://github.com/deepseek-ai/DeepEP')
@@ -601,11 +602,11 @@ def _patched_fusedmoe_forward(*args, **kwargs):
601602

602603
def update_dispatch_mode(self):
603604
if self.use_deepep:
604-
deepep_mode = DeepEPMode.NORMAL
605+
deepep_mode_type = self.deepep_mode.NORMAL
605606
step_ctx = get_step_ctx_manager().current_context()
606607
if step_ctx.is_decoding:
607-
deepep_mode = DeepEPMode.LOW_LATENCY
608-
DeepEPBuffer.set_deepep_mode(deepep_mode)
608+
deepep_mode_type = self.deepep_mode.LOW_LATENCY
609+
self.deepep_buffer.set_deepep_mode(deepep_mode_type)
609610

610611

611612
class TritonFusedMoEBlockedF8Builder(FusedMoEBlockedF8Builder):

lmdeploy/pytorch/models/deepseek_v2.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ def get_input_embeddings(self):
11601160

11611161
def _update_dispatch_mode(self):
11621162
if isinstance(self.model.layers[0].mlp, DeepseekV2MoE):
1163-
if hasattr(self.model.layers[0].mlp.experts.impl, "update_dispatch_mode"):
1163+
if hasattr(self.model.layers[0].mlp.experts.impl, 'update_dispatch_mode'):
11641164
self.model.layers[0].mlp.experts.impl.update_dispatch_mode()
11651165

11661166
def prepare_inputs_for_generation(

0 commit comments

Comments
 (0)