[https://nvbugs/1234567][fix] Revert https://github.com/NVIDIA/TensorRT-LLM/pull/7768/files (#7813)

litaotju · web-flow · commit 015e149211c5 · 2025-09-18T03:34:05.000+08:00
Signed-off-by: Tao Li
diff --git a/tensorrt_llm/_torch/models/modeling_llama.py b/tensorrt_llm/_torch/models/modeling_llama.py
@@ -11,8 +11,7 @@
 from transformers.models.llama4.modeling_llama4 import Llama4MultiModalProjector
 
 from tensorrt_llm._torch.distributed import (AllReduce, AllReduceFusionOp,
-                                             AllReduceParams, AllReduceStrategy,
-                                             MoEAllReduce)
+                                             AllReduceParams, MoEAllReduce)
 from tensorrt_llm._torch.models.checkpoints.base_weight_mapper import \
     BaseWeightMapper
 from tensorrt_llm._utils import get_sm_version
@@ -647,12 +646,7 @@ def __init__(
                                                 eps=config.rms_norm_eps,
                                                 dtype=config.torch_dtype)
 
-        # TODO: This is a temporary fix to disable oneshot kernel for pre-Blackwell arch to avoid perf regressions
-        self.all_reduce = AllReduce(
-            strategy=model_config.allreduce_strategy
-            if get_sm_version() >= 100 else AllReduceStrategy.NCCL,
-            mapping=model_config.mapping,
-        )
+        self.all_reduce = AllReduce(mapping=model_config.mapping)
 
         self.next_layer_layernorm: RMSNorm = None
         self.next_attn: LlamaAttention = None