Skip to content

Commit 426cc86

Browse files
authored
[BugFix] Fix routed_scaling_factor double mul for dots1 and glm4 MoE models (vllm-project#24132)
Signed-off-by: Yong Hoon Shin <[email protected]>
1 parent e81d4e6 commit 426cc86

File tree

2 files changed

+4
-2
lines changed

2 files changed

+4
-2
lines changed

vllm/model_executor/models/dots1.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ def __init__(
137137
topk_group=config.topk_group,
138138
prefix=f"{prefix}.experts",
139139
scoring_func=config.scoring_func,
140-
routed_scaling_factor=self.routed_scaling_factor,
140+
# we do scaling outside, set factor to 1.0 to avoid double mul
141+
routed_scaling_factor=1.0,
141142
e_score_correction_bias=self.gate.e_score_correction_bias)
142143

143144
if config.n_shared_experts is not None:

vllm/model_executor/models/glm4_moe.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ def __init__(
159159
topk_group=config.topk_group,
160160
prefix=f"{prefix}.experts",
161161
scoring_func="sigmoid",
162-
routed_scaling_factor=self.routed_scaling_factor,
162+
# we do scaling outside, set factor to 1.0 to avoid double mul
163+
routed_scaling_factor=1.0,
163164
e_score_correction_bias=self.gate.e_score_correction_bias,
164165
enable_eplb=self.enable_eplb,
165166
num_redundant_experts=self.n_redundant_experts)

0 commit comments

Comments
 (0)