3232from .base import Communication
3333from .deep_ep import DeepEP
3434from .deep_ep_low_latency import DeepEPLowLatency
35- from .mnnvl_latency import MnnvlLatency
36- from .mnnvl_throughput import MNNVLThroughput
35+ from .nvlink_one_sided import NVLinkOneSided
36+ from .nvlink_two_sided import NVLinkTwoSided
3737
3838
3939def is_high_throughput () -> bool :
@@ -72,7 +72,7 @@ class CommunicationFactory:
7272 Factory for creating MoE communication methods
7373
7474 Selects the best communication method based on:
75- - Hardware support (MNNVL , DeepEP)
75+ - Hardware support (NVLINK , DeepEP)
7676 - Configuration settings
7777 - Workload characteristics
7878 """
@@ -85,16 +85,17 @@ def create_strategy(
8585 top_k : int ,
8686 expert_size_per_partition : int ,
8787 payload_in_workspace : bool = False ,
88- alltoall_result_do_sum : bool = False ,
88+ alltoall_result_do_sum : bool = True ,
8989 ) -> Optional [Communication ]:
9090 """
9191 Create the best communication method for the given configuration
9292
9393 Selection priority:
9494 1. Force method (if specified via TRTLLM_FORCE_ALLTOALL_METHOD env)
95- 2. MNNVL (if hardware supports)
95+ 2. NVLINK (if hardware supports)
9696 - Selects latency or throughput backend based on TRTLLM_MOE_ALLTOALL_BACKEND env
97- - Default: "mnnvllatency", alternative: "mnnvlthroughput"
97+ - Default: "NVLinkTwoSided", legacy: "mnnvllatency"
98+ - Alternative: "NVLinkOneSided", legacy: "mnnvlthroughput"
9899 3. DeepEP / DeepEPLowLatency (if enabled and hardware supports)
99100 4. AllGather + ReduceScatter (fallback, always works)
100101
@@ -104,8 +105,8 @@ def create_strategy(
104105 num_slots: Total number of expert slots
105106 top_k: Number of experts per token
106107 expert_size_per_partition: Number of experts per partition (required for DeepEP)
107- payload_in_workspace: If True, final_hidden_states is already in workspace (for MNNVLThroughput )
108- alltoall_result_do_sum: If True, sum the alltoall results (for MnnvlLatency )
108+ payload_in_workspace: If True, final_hidden_states is already in workspace (for NVLinkOneSided )
109+ alltoall_result_do_sum: If True, sum the alltoall results (for NVLinkTwoSided )
109110
110111 Returns:
111112 The selected communication method, or None if attention does not use DP
@@ -139,14 +140,14 @@ def create_strategy(
139140 if force_method is not None :
140141 # Validate platform support for forced method
141142 method_upper = force_method .upper ()
142- if method_upper in ["MNNVLLATENCY " , "MNNVLTHROUGHPUT " ]:
143- if not MnnvlLatency .is_platform_supported ():
143+ if method_upper in ["NVLINK_TWO_SIDED " , "NVLINK_ONE_SIDED " ]:
144+ if not NVLinkTwoSided .is_platform_supported ():
144145 raise RuntimeError (
145146 f"Forced method '{ force_method } ' is not supported on this platform. "
146- "MNNVLLATENCY and MNNVLTHROUGHPUT require compatible hardware."
147+ "NVLINK two-sided and one-sided modes require compatible hardware."
147148 )
148149 elif method_upper in ["DEEPEP" , "DEEPEPLOWLATENCY" ]:
149- if not DeepEP .is_platform_supported (mapping ):
150+ if not DeepEP .is_platform_supported ():
150151 raise RuntimeError (
151152 f"Forced method '{ force_method } ' is not supported on this platform. "
152153 "DeepEP requires compatible hardware and TRTLLM_CAN_USE_DEEP_EP=1."
@@ -163,19 +164,20 @@ def create_strategy(
163164 alltoall_result_do_sum ,
164165 )
165166
166- # Try MNNVL first (highest priority)
167- if MnnvlLatency .is_platform_supported ():
167+ # Try NVLINK first (highest priority)
168+ if NVLinkTwoSided .is_platform_supported ():
169+ # TODO: update when we have a more clear heuristic.
168170 if is_high_throughput ():
169- # Currently, MNNVLThroughput shows better performance at all scenarios
170- return MNNVLThroughput (
171+ # Currently, NVLinkOneSided shows better performance at all scenarios
172+ return NVLinkOneSided (
171173 mapping ,
172- num_experts ,
174+ num_slots ,
173175 top_k ,
174176 max_num_tokens_per_rank = max_num_tokens ,
175177 payload_in_workspace = payload_in_workspace ,
176178 )
177179 else :
178- return MnnvlLatency (
180+ return NVLinkTwoSided (
179181 mapping ,
180182 num_experts ,
181183 num_slots ,
@@ -187,9 +189,7 @@ def create_strategy(
187189 # Try DeepEP
188190 if os .environ .get ("TRTLLM_CAN_USE_DEEP_EP" , "0" ) == "1" :
189191 if weight_dtype == torch .bfloat16 :
190- if DeepEP .is_platform_supported (mapping ) and is_deepep_feasible (
191- mapping .moe_ep_size
192- ):
192+ if DeepEP .is_platform_supported () and is_deepep_feasible (mapping .moe_ep_size ):
193193 return DeepEP (
194194 mapping ,
195195 num_slots ,
@@ -240,21 +240,21 @@ def _create_forced_method(
240240
241241 method = method .upper ()
242242
243- if method == "MNNVLLATENCY" :
244- return MnnvlLatency (
243+ if method in [ "NVLINK_TWO_SIDED" ] :
244+ return NVLinkTwoSided (
245245 mapping ,
246246 num_experts ,
247247 num_slots ,
248248 top_k ,
249249 use_low_precision_combine ,
250250 alltoall_result_do_sum = alltoall_result_do_sum ,
251251 )
252- elif method == "MNNVLTHROUGHPUT" :
253- # MNNVLThroughput requires max_num_tokens_per_rank
252+ elif method in [ "NVLINK_ONE_SIDED" ] :
253+ # NVLinkOneSided requires max_num_tokens_per_rank
254254 # max_num_tokens is per-rank value (as passed from callers like cutlass)
255- return MNNVLThroughput (
255+ return NVLinkOneSided (
256256 mapping ,
257- num_experts ,
257+ num_slots ,
258258 top_k ,
259259 max_num_tokens_per_rank = max_num_tokens ,
260260 payload_in_workspace = payload_in_workspace ,
0 commit comments