activate NCCL_SYMMETRIC auto-tuning

nv-lschneider · nv-lschneider · commit 35ba03b501fa · 2026-01-16T10:14:28.000-06:00
Signed-off-by: Ludwig Schneider &lt;lschneider@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
@@ -1695,8 +1695,7 @@ def get_valid_tactics(
         **kwargs,
     ) -> List[int]:
         valid_strategies = [
-            # TODO: NCCL_SYMMETRIC will cause hang during tuning process
-            # AllReduceStrategy.NCCL_SYMMETRIC.value,
+            AllReduceStrategy.NCCL_SYMMETRIC.value,
             AllReduceStrategy.NCCL.value,
         ]
         # Fallback in allreduceOp is set to NCCL_SYMMETRIC as default
@@ -1725,7 +1724,7 @@ def forward(
         input, residual, norm_weight, scale, bias, workspace = inputs
         if tactic == -1:
             # TODO: Use NCCL instead of NCCL_SYMMETRIC to avoid hanging during tuning process
-            tactic = AllReduceStrategy.NCCL.value
+            tactic = AllReduceStrategy.NCCL_SYMMETRIC.value
 
         return torch.ops.trtllm.allreduce(
             input,