fuse fp32 for GLM-4.5 e_score_correction_bias (#22143)

zRzRzRzRzRzRzR · web-flow · commit d3c18c9cb0b6 · 2025-08-03T09:04:54.000-07:00
Signed-off-by: zRzRzRzRzRzRzR &lt;2448370773@qq.com&gt;
diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py
@@ -125,9 +125,8 @@ def __init__(
                                      quant_config=None,
                                      prefix=f"{prefix}.gate")
 
-        # noaux_tc is not set in transformers new config now
-        self.gate.e_score_correction_bias = (nn.Parameter(
-            torch.empty(config.n_routed_experts)))
+        self.gate.e_score_correction_bias = nn.Parameter(
+            torch.empty(config.n_routed_experts, dtype=torch.float32))
 
         # Load balancing settings.
         vllm_config = get_current_vllm_config()