NVIDIA
diff --git a/‎ATTRIBUTIONS-Python.md‎
Lines changed: 2 additions & 2 deletions b/‎ATTRIBUTIONS-Python.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cpp/tensorrt_llm/common/customAllReduceUtils.h‎
Lines changed: 2 additions & 3 deletions b/‎cpp/tensorrt_llm/common/customAllReduceUtils.h‎
Lines changed: 2 additions & 3 deletions
@@ -25486,7 +25486,7 @@ limitations under the License.
 ```
 
 ### URLs
-  - `Homepage`: https://github.com/NVIDIA/TensorRT-Model-Optimizer
+  - `Homepage`: https://github.com/NVIDIA/Model-Optimizer
 
 
 ## nvidia-modelopt-core (0.33.1)
@@ -25513,7 +25513,7 @@ limitations under the License.
 ```
 
 ### URLs
-  - `Homepage`: https://github.com/NVIDIA/TensorRT-Model-Optimizer
+  - `Homepage`: https://github.com/NVIDIA/Model-Optimizer
 
 
 ## nvidia-nccl-cu12 (2.27.3)
 
@@ -164,7 +164,7 @@ state-of-the-art optimizations to perform inference efficiently on NVIDIA GPUs.<
 [➡️ link](https://www.bentoml.com/blog/tuning-tensor-rt-llm-for-optimal-serving-with-bentoml)
 
 
-* [2024/08/20] 🏎️SDXL with #TensorRT Model Optimizer ⏱️⚡ 🏁 cache diffusion 🏁 quantization aware training 🏁 QLoRA 🏁 #Python 3.12
+* [2024/08/20] 🏎️SDXL with #Model Optimizer ⏱️⚡ 🏁 cache diffusion 🏁 quantization aware training 🏁 QLoRA 🏁 #Python 3.12
 [➡️ link](https://developer.nvidia.com/blog/nvidia-tensorrt-model-optimizer-v0-15-boosts-inference-performance-and-expands-model-support/)
 
 * [2024/08/13] 🐍 DIY Code Completion with #Mamba ⚡ #TensorRT #LLM for speed 🤖 NIM for ease ☁️ deploy anywhere
@@ -209,7 +209,7 @@ Technical Deep Dive for serious coders ✅+99% compression ✅1 set of weights
 * [2024/05/21] ✨@modal_labs has the codes for serverless @AIatMeta Llama 3 on #TensorRT #LLM ✨👀 📚 Marvelous Modal Manual:
 Serverless TensorRT LLM (LLaMA 3 8B) | Modal Docs [➡️ link](https://modal.com/docs/examples/trtllm_llama)
 
-* [2024/05/08] NVIDIA TensorRT Model Optimizer -- the newest member of the #TensorRT ecosystem is a library of post-training and training-in-the-loop model optimization techniques ✅quantization ✅sparsity ✅QAT [➡️ blog](https://developer.nvidia.com/blog/accelerate-generative-ai-inference-performance-with-nvidia-tensorrt-model-optimizer-now-publicly-available/)
+* [2024/05/08] NVIDIA Model Optimizer -- the newest member of the #TensorRT ecosystem is a library of post-training and training-in-the-loop model optimization techniques ✅quantization ✅sparsity ✅QAT [➡️ blog](https://developer.nvidia.com/blog/accelerate-generative-ai-inference-performance-with-nvidia-tensorrt-model-optimizer-now-publicly-available/)
 
 * [2024/05/07] 🦙🦙🦙 24,000 tokens per second 🛫Meta Llama 3 takes off with #TensorRT #LLM 📚[➡️ link](https://blogs.nvidia.com/blog/meta-llama3-inference-acceleration/)
 
 
@@ -81,7 +81,6 @@ inline AllReduceStrategyType SelectStrategyLP(size_t seq_len, size_t hidden_size
     {
         return AllReduceStrategyType::ONESHOT;
     }
-    return AllReduceStrategyType::NCCL;
 }
 
 // use 1D vector to store the best strategy instead of a map for each sm version
@@ -143,15 +142,15 @@ inline AllReduceStrategyType selectStrategyLookUpTable(
         sm_version = 100;
     }
 
-    // Check if the entry is out of bounds, otherwise return NCCL as fallback
+    // Check if the entry is out of bounds, otherwise return NCCL_SYMMETRIC as fallback
     if (AllReduceBestStrategyTable.find(sm_version) == AllReduceBestStrategyTable.end()
         || tp_index >= AllReduceBestStrategyTable.at(sm_version).size()
         || fusion_op_index >= AllReduceBestStrategyTable.at(sm_version).at(tp_index).size()
         || hidden_size_index >= AllReduceBestStrategyTable.at(sm_version).at(tp_index).at(fusion_op_index).size()
         || num_token_index
             >= AllReduceBestStrategyTable.at(sm_version).at(tp_index).at(fusion_op_index).at(hidden_size_index).size())
     {
-        return AllReduceStrategyType::NCCL;
+        return AllReduceStrategyType::NCCL_SYMMETRIC;
     }
 
     return static_cast<AllReduceStrategyType>(
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,6 @@ inline AllReduceStrategyType SelectStrategyLP(size_t seq_len, size_t hidden_size`
`81`	`81`	`{`
`82`	`82`	`return AllReduceStrategyType::ONESHOT;`
`83`	`83`	`}`
`84`		`- return AllReduceStrategyType::NCCL;`
`85`	`84`	`}`
`86`	`85`
`87`	`86`	`// use 1D vector to store the best strategy instead of a map for each sm version`
`@@ -143,15 +142,15 @@ inline AllReduceStrategyType selectStrategyLookUpTable(`
`143`	`142`	`sm_version = 100;`
`144`	`143`	`}`
`145`	`144`
`146`		`- // Check if the entry is out of bounds, otherwise return NCCL as fallback`
	`145`	`+ // Check if the entry is out of bounds, otherwise return NCCL_SYMMETRIC as fallback`
`147`	`146`	`if (AllReduceBestStrategyTable.find(sm_version) == AllReduceBestStrategyTable.end()`
`148`	`147`	`\|\| tp_index >= AllReduceBestStrategyTable.at(sm_version).size()`
`149`	`148`	`\|\| fusion_op_index >= AllReduceBestStrategyTable.at(sm_version).at(tp_index).size()`
`150`	`149`	`\|\| hidden_size_index >= AllReduceBestStrategyTable.at(sm_version).at(tp_index).at(fusion_op_index).size()`
`151`	`150`	`\|\| num_token_index`
`152`	`151`	`>= AllReduceBestStrategyTable.at(sm_version).at(tp_index).at(fusion_op_index).at(hidden_size_index).size())`
`153`	`152`	`{`
`154`		`- return AllReduceStrategyType::NCCL;`
	`153`	`+ return AllReduceStrategyType::NCCL_SYMMETRIC;`
`155`	`154`	`}`
`156`	`155`
`157`	`156`	`return static_cast<AllReduceStrategyType>(`