InternLM · SHshenhao · Nov 14, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/lmdeploy/pytorch/models/deepseek_v2.py b/lmdeploy/pytorch/models/deepseek_v2.py
@@ -1169,6 +1169,12 @@ def prepare_inputs_for_generation(
         position_ids = context.position_ids
         attn_metadata = context.attn_metadata
 
+        from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode
 class FusedDeepEpMoEBlockedF8Impl(TritonFusedMoEBlockedF8Impl): 
 def get_step_ctx_manager(): 
 class FusedDeepEpMoEBlockedF8Impl(TritonFusedMoEBlockedF8Impl): 
 def get_step_ctx_manager(): 
+        deepep_mode = DeepEPMode.NORMAL
+        if context.is_decoding:
+            deepep_mode = DeepEPMode.LOW_LATENCY
+        DeepEPBuffer.set_deepep_mode(deepep_mode)
+
         return dict(
             input_ids=input_ids,
             position_ids=position_ids,