[None][chore] Pass without_comm to cutlass and deepgemm (#11229)

xxi-nv · web-flow · commit 4c1d9d0c10a2 · 2026-02-05T02:07:59.000-05:00
Signed-off-by: xxi &lt;xxi@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/create_moe.py b/tensorrt_llm/_torch/modules/fused_moe/create_moe.py
@@ -196,6 +196,7 @@ def create_moe_backend(
             swiglu_beta=swiglu_beta,
             swiglu_limit=swiglu_limit,
             init_load_balancer=init_load_balancer,
+            without_comm=without_comm,
             activation_type=activation_type,
         )
     elif moe_cls == WideEPMoE:
@@ -256,6 +257,7 @@ def create_moe_backend(
             weight_loading_mode=weight_loading_mode,
             apply_router_weight_on_input=apply_router_weight_on_input,
             layer_idx=layer_idx,
+            without_comm=without_comm,
         )
     elif moe_cls == TritonFusedMoE:
         assert not apply_router_weight_on_input, "apply_router_weight_on_input is not supported in TritonFusedMoE."