From 17b443acfc577a5fa62925c21440f729472f2c21 Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <lschneider@nvidia.com>
Date: Tue, 6 Jan 2026 09:45:25 -0800
Subject: [PATCH 1/3] activate NCCL_SYMMETRIC auto-tuning

Signed-off-by: Ludwig Schneider <lschneider@nvidia.com>
---
 tensorrt_llm/_torch/custom_ops/torch_custom_ops.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
index e3b725b3930..0465ccc7806 100644
--- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
+++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
@@ -1690,8 +1690,7 @@ def get_valid_tactics(
         **kwargs,
     ) -> List[int]:
         valid_strategies = [
-            # TODO: NCCL_SYMMETRIC will cause hang during tuning process
-            # AllReduceStrategy.NCCL_SYMMETRIC.value,
+            AllReduceStrategy.NCCL_SYMMETRIC.value,
             AllReduceStrategy.NCCL.value,
         ]
         # Fallback in allreduceOp is set to NCCL_SYMMETRIC as default
@@ -1720,7 +1719,7 @@ def forward(
         input, residual, norm_weight, scale, bias, workspace = inputs
         if tactic == -1:
             # TODO: Use NCCL instead of NCCL_SYMMETRIC to avoid hanging during tuning process
-            tactic = AllReduceStrategy.NCCL.value
+            tactic = AllReduceStrategy.NCCL_SYMMETRIC.value
 
         return torch.ops.trtllm.allreduce(
             input,

From 411d6e1de68f12218b361d966395cf36c16969f7 Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <lschneider@nvidia.com>
Date: Wed, 7 Jan 2026 12:44:01 -0600
Subject: [PATCH 2/3] unwaive test, a previous PR may have resolved hang

Signed-off-by: Ludwig Schneider <lschneider@nvidia.com>
---
 tensorrt_llm/_torch/custom_ops/torch_custom_ops.py | 5 +++--
 tests/integration/test_lists/waives.txt            | 1 -
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
index 0465ccc7806..e3b725b3930 100644
--- a/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
+++ b/tensorrt_llm/_torch/custom_ops/torch_custom_ops.py
@@ -1690,7 +1690,8 @@ def get_valid_tactics(
         **kwargs,
     ) -> List[int]:
         valid_strategies = [
-            AllReduceStrategy.NCCL_SYMMETRIC.value,
+            # TODO: NCCL_SYMMETRIC will cause hang during tuning process
+            # AllReduceStrategy.NCCL_SYMMETRIC.value,
             AllReduceStrategy.NCCL.value,
         ]
         # Fallback in allreduceOp is set to NCCL_SYMMETRIC as default
@@ -1719,7 +1720,7 @@ def forward(
         input, residual, norm_weight, scale, bias, workspace = inputs
         if tactic == -1:
             # TODO: Use NCCL instead of NCCL_SYMMETRIC to avoid hanging during tuning process
-            tactic = AllReduceStrategy.NCCL_SYMMETRIC.value
+            tactic = AllReduceStrategy.NCCL.value
 
         return torch.ops.trtllm.allreduce(
             input,
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index 5000daf6338..26264603842 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -269,7 +269,6 @@ full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_
 full:sm89/accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_tp_pp_symmetric[MMLU-tp2pp2] SKIP (https://nvbugs/5596337)
 accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] SKIP (https://nvbugs/5721672)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[tp4-fp8kv=True-attn_backend=FLASHINFER-torch_compile=True] SKIP (https://nvbugs/5741304)
-unittest/_torch/multi_gpu/test_allreduce.py::test_allreduce_fusion_patterns[2-residual_rms_norm_out_quant_fp8-hidden:7168-seqlen:8192] SKIP (https://nvbugs/5741392)
 unittest/executor/test_rpc.py::TestRpcCorrectness::test_incremental_task_async SKIP (https://nvbugs/5741476)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_4gpus[pp4-fp8kv=True-attn_backend=TRTLLM-torch_compile=False] SKIP (https://nvbugs/5740377)
 accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] SKIP (https://nvbugs/5740377)

From e1c1947155626de15b1b15cfef79b1810c93615f Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <lschneider@nvidia.com>
Date: Mon, 12 Jan 2026 14:38:02 -0600
Subject: [PATCH 3/3] unwaived other failing test as well (same suspected root
 cause)

Signed-off-by: Ludwig Schneider <lschneider@nvidia.com>
---
 tests/integration/test_lists/waives.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
index 26264603842..86aa6eefba8 100644
--- a/tests/integration/test_lists/waives.txt
+++ b/tests/integration/test_lists/waives.txt
@@ -295,7 +295,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5759338)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False] SKIP (https://nvbugs/5759338)
 test_e2e.py::test_ptp_quickstart_advanced_2gpus_sm120[Nemotron-Super-49B-v1-BF16-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1] SKIP (https://nvbugs/5670469)
-unittest/_torch/multi_gpu/test_mnnvl_allreduce.py::test_row_linear_residual_norm_fusion[no_fusion-strategy:8-dtype:bfloat16-hidden:8192-seqlen:[15]] SKIP (https://nvbugs/5761364)
 triton_server/test_triton.py::test_gpt_speculative_decoding[gpt-speculative-decoding] SKIP (https://nvbugs/5762854)
 accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B_Instruct_RocketKV::test_auto_dtype SKIP (https://nvbugs/5762822)
 unittest/_torch/sampler/test_return_logits.py SKIP (https://nvbugs/5764627)