fix: add missing dense_output parameter to topk_routing_with_score_function

ilml · claude · ilml · commit 6521ee27a09e · 2026-03-21T17:22:37.000-07:00
The router.py passes dense_output=True for inference mode but the
merge took dev's version of moe_utils.py which lacks this parameter.
Added from main to fix TypeError in InferenceTopKRouter.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/megatron/core/transformer/moe/moe_utils.py b/megatron/core/transformer/moe/moe_utils.py
@@ -662,6 +662,7 @@ def topk_routing_with_score_function(
     expert_bias: Optional[torch.Tensor] = None,
     fused: bool = False,
     router_replay: Optional['RouterReplay'] = None,
+    dense_output: bool = False,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
     """Compute the routing probabilities and map for top-k selection with score function.
 
@@ -774,6 +775,9 @@ def compute_topk(scores, topk, num_groups=None, group_topk=None):
     if scaling_factor:
         probs = probs * scaling_factor
 
+    if dense_output:
+        return probs, top_indices
+
     if torch.are_deterministic_algorithms_enabled():
         # build [num_tokens, num_experts] from [num_tokens, topk]
         routing_probs = torch.zeros_like(logits)