Skip to content

Commit febfa9e

Browse files
committed
update_dlblas_with_deepep
1 parent f63730d commit febfa9e

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

lmdeploy/pytorch/models/deepseek_v2.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,12 @@ def prepare_inputs_for_generation(
11691169
position_ids = context.position_ids
11701170
attn_metadata = context.attn_metadata
11711171

1172+
from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode
1173+
deepep_mode = DeepEPMode.NORMAL
1174+
if context.is_decoding:
1175+
deepep_mode = DeepEPMode.LOW_LATENCY
1176+
DeepEPBuffer.set_deepep_mode(deepep_mode)
1177+
11721178
return dict(
11731179
input_ids=input_ids,
11741180
position_ids=position_ids,

0 commit comments

Comments
 (0)