Fix Qwen accuracy fix by not sending quant_config to MOE self.gate RLU

Doug Lehr · dllehr-amd · commit f83d4df93c18 · 2025-09-05T17:22:26.000-05:00
diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py
@@ -149,7 +149,7 @@ def __init__(
         self.gate = ReplicatedLinear(config.hidden_size,
                                      config.num_experts,
                                      bias=False,
-                                     quant_config=quant_config,
+                                     quant_config=None,
                                      prefix=f"{prefix}.gate")
 
     def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: