Skip to content

Commit ef1d4a4

Browse files
authored
[https://nvbugs/5727475][fix] Avoid use property with setter in nn.Mo… (#10212)
Signed-off-by: Jin Li <[email protected]>
1 parent d944430 commit ef1d4a4

File tree

2 files changed

+9
-53
lines changed

2 files changed

+9
-53
lines changed

tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py

Lines changed: 9 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ def __init__(
147147
model_config.skip_create_weights_in_init = True
148148
model_config._frozen = True
149149

150-
self.backend = create_moe_backend(
150+
backend = create_moe_backend(
151151
moe_cls=moe_cls,
152152
routing_method=routing_method,
153153
num_experts=self.num_experts,
@@ -168,15 +168,14 @@ def __init__(
168168
without_comm=True,
169169
)
170170

171+
self.validate_backend(backend)
172+
self.backend = backend
173+
171174
# Sync critical attributes from ConfigurableMoE to backend
172175
# ConfigurableMoE's super().__init__() was called with real layer_idx and initialized load balancer.
173176
# Backend was created with init_load_balancer=False and without_comm=True to avoid
174177
# duplicate initialization. Now sync all attributes from ConfigurableMoE to backend.
175178
if self.backend is not None:
176-
# Add a check to WAR the issue that the backend is none during torch.compile
177-
assert not torch.compiler.is_compiling(), (
178-
"Backend should not be none if not in torch.compile"
179-
)
180179
self.backend.layer_idx = self.layer_idx
181180
self.backend.layer_idx_str = self.layer_idx_str
182181
self.backend.num_slots = self.num_slots
@@ -197,7 +196,7 @@ def __init__(
197196
self.backend.create_weights()
198197

199198
# ========== Create Communication Strategy ==========
200-
self._comm = self._create_comm_strategy_auto()
199+
self.comm = self._create_comm_strategy_auto()
201200

202201
# ========== Chunking Configuration ==========
203202
# moe_max_num_tokens is set in ModelConfig.__post_init__ if not specified
@@ -892,23 +891,13 @@ def _forward_multiple_chunks(
892891

893892
return outputs
894893

895-
# ========== Backend Property with Validation ==========
896-
897-
@property
898-
def backend(self) -> MoE:
899-
"""
900-
Get the current MoE backend implementation
894+
# ========== Backend Validation ==========
901895

902-
Note: Returns a FusedMoE instance (e.g., CutlassFusedMoE, CuteDslFusedMoE)
896+
def validate_backend(self, backend: MoE):
903897
"""
904-
return self._backend
898+
Validate MOE backend.
905899
906-
@backend.setter
907-
def backend(self, backend: MoE):
908-
"""
909-
Set MoE backend with validation
910-
911-
This setter validates that:
900+
It validates that:
912901
1. Backend is not None
913902
2. If EPLB is enabled, backend must support routing separation
914903
@@ -932,38 +921,6 @@ def backend(self, backend: MoE):
932921
f"Either disable EPLB or use a backend that supports load balancer."
933922
)
934923

935-
# Set backend (validation passed)
936-
self._backend = backend
937-
938-
@property
939-
def comm(self) -> Optional[Communication]:
940-
"""Get the current communication strategy"""
941-
return self._comm
942-
943-
@comm.setter
944-
def comm(self, strategy: Optional[Communication]):
945-
"""
946-
Set communication strategy with validation
947-
948-
This setter validates that the strategy is compatible with the configuration.
949-
950-
Args:
951-
strategy: Communication instance to set (can be None for lazy creation)
952-
953-
Raises:
954-
ValueError: If strategy is incompatible with current configuration
955-
956-
Note: Unlike backend, comm can be None (will be created lazily).
957-
This allows for automatic strategy selection based on hardware.
958-
"""
959-
# comm can be None (lazy creation)
960-
if strategy is None:
961-
self._comm = None
962-
return
963-
964-
# Set strategy (validation passed)
965-
self._comm = strategy
966-
967924
# ========== Helper Methods ==========
968925

969926
def _is_using_nvlink_two_sided(self) -> bool:

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT
393393
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
394394
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
395395
unittest/llmapi/test_llm_pytorch.py::test_llm_reward_model SKIP (https://nvbugs/5670458)
396-
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[enable_configurable_moe-moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] SKIP (https://nvbugs/5727475)
397396
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5740377)
398397
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)
399398
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)

0 commit comments

Comments
 (0)