File tree Expand file tree Collapse file tree 2 files changed +4
-4
lines changed
models/deepseek2/layer_weights Expand file tree Collapse file tree 2 files changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -40,8 +40,8 @@ def _parse_config(self):
4040 self .num_attention_heads = self .network_config_ ["num_attention_heads" ]
4141 self .kv_lora_rank = self .network_config_ ["kv_lora_rank" ]
4242 self .num_fused_shared_experts = 0
43- if get_env_start_args ().enable_fused_shared_experts and self .is_moe :
44- # MOE_MODE 处于 TP 模式下才能使能 enable_fused_shared_experts
43+ if not get_env_start_args ().disable_fused_shared_experts and self .is_moe :
44+ # MOE_MODE 处于 TP 模式下才能使能 fused_shared_experts
4545 moe_mode = os .getenv ("MOE_MODE" , "TP" )
4646 assert moe_mode == "TP"
4747 self .num_fused_shared_experts = self .network_config_ .get ("n_shared_experts" , 0 )
Original file line number Diff line number Diff line change @@ -466,9 +466,9 @@ def make_argument_parser() -> argparse.ArgumentParser:
466466 help = """Whether to update the redundant expert for deepseekv3 model by online expert used counter.""" ,
467467 )
468468 parser .add_argument (
469- "--enable_fused_shared_experts " ,
469+ "--disable_fused_shared_experts " ,
470470 action = "store_true" ,
471- help = """Whether to enable fused shared experts for deepseekv3 model. only work when MOE_MODE=TP """ ,
471+ help = """Whether to disable fused shared experts for deepseekv3 model. only work when MOE_MODE=TP """ ,
472472 )
473473 parser .add_argument (
474474 "--mtp_mode" ,
You can’t perform that action at this time.
0 commit comments