Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions lmdeploy/pytorch/models/deepseek_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,12 @@ def prepare_inputs_for_generation(
position_ids = context.position_ids
attn_metadata = context.attn_metadata

from dlblas.layers.moe.token_dispatcher import DeepEPBuffer, DeepEPMode
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

move this to backend

class FusedDeepEpMoEBlockedF8Impl(TritonFusedMoEBlockedF8Impl):

You can get context with context manager

def get_step_ctx_manager():

deepep_mode = DeepEPMode.NORMAL
if context.is_decoding:
deepep_mode = DeepEPMode.LOW_LATENCY
DeepEPBuffer.set_deepep_mode(deepep_mode)

return dict(
input_ids=input_ids,
position_ids=position_ids,
Expand Down
Loading