[https://nvbugs/5727475][fix] Avoid use property with setter in nn.Mo… (#10212)

liji-nv · web-flow · commit ef1d4a40b581 · 2025-12-31T06:21:36.000-05:00
Signed-off-by: Jin Li &lt;59594262+liji-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py b/tensorrt_llm/_torch/modules/fused_moe/configurable_moe.py
@@ -147,7 +147,7 @@ def __init__(
         model_config.skip_create_weights_in_init = True
         model_config._frozen = True
 
-        self.backend = create_moe_backend(
+        backend = create_moe_backend(
             moe_cls=moe_cls,
             routing_method=routing_method,
             num_experts=self.num_experts,
@@ -168,15 +168,14 @@ def __init__(
             without_comm=True,
         )
 
+        self.validate_backend(backend)
+        self.backend = backend
+
         # Sync critical attributes from ConfigurableMoE to backend
         # ConfigurableMoE's super().__init__() was called with real layer_idx and initialized load balancer.
         # Backend was created with init_load_balancer=False and without_comm=True to avoid
         # duplicate initialization. Now sync all attributes from ConfigurableMoE to backend.
         if self.backend is not None:
-            # Add a check to WAR the issue that the backend is none during torch.compile
-            assert not torch.compiler.is_compiling(), (
-                "Backend should not be none if not in torch.compile"
-            )
             self.backend.layer_idx = self.layer_idx
             self.backend.layer_idx_str = self.layer_idx_str
             self.backend.num_slots = self.num_slots
@@ -197,7 +196,7 @@ def __init__(
             self.backend.create_weights()
 
         # ========== Create Communication Strategy ==========
-        self._comm = self._create_comm_strategy_auto()
+        self.comm = self._create_comm_strategy_auto()
 
         # ========== Chunking Configuration ==========
         # moe_max_num_tokens is set in ModelConfig.__post_init__ if not specified
@@ -892,23 +891,13 @@ def _forward_multiple_chunks(
 
         return outputs
 
-    # ========== Backend Property with Validation ==========
-
-    @property
-    def backend(self) -> MoE:
-        """
-        Get the current MoE backend implementation
+    # ========== Backend Validation ==========
 
-        Note: Returns a FusedMoE instance (e.g., CutlassFusedMoE, CuteDslFusedMoE)
+    def validate_backend(self, backend: MoE):
         """
-        return self._backend
+        Validate MOE backend.
 
-    @backend.setter
-    def backend(self, backend: MoE):
-        """
-        Set MoE backend with validation
-
-        This setter validates that:
+        It validates that:
         1. Backend is not None
         2. If EPLB is enabled, backend must support routing separation
 
@@ -932,38 +921,6 @@ def backend(self, backend: MoE):
                 f"Either disable EPLB or use a backend that supports load balancer."
             )
 
-        # Set backend (validation passed)
-        self._backend = backend
-
-    @property
-    def comm(self) -> Optional[Communication]:
-        """Get the current communication strategy"""
-        return self._comm
-
-    @comm.setter
-    def comm(self, strategy: Optional[Communication]):
-        """
-        Set communication strategy with validation
-
-        This setter validates that the strategy is compatible with the configuration.
-
-        Args:
-            strategy: Communication instance to set (can be None for lazy creation)
-
-        Raises:
-            ValueError: If strategy is incompatible with current configuration
-
-        Note: Unlike backend, comm can be None (will be created lazily).
-              This allows for automatic strategy selection based on hardware.
-        """
-        # comm can be None (lazy creation)
-        if strategy is None:
-            self._comm = None
-            return
-
-        # Set strategy (validation passed)
-        self._comm = strategy
-
     # ========== Helper Methods ==========
 
     def _is_using_nvlink_two_sided(self) -> bool:
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -393,7 +393,6 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUT
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_2gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
 accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_eagle3_4gpus[cutlass-two_model-overlap_scheduler] SKIP (https://nvbugs/5702826)
 unittest/llmapi/test_llm_pytorch.py::test_llm_reward_model SKIP (https://nvbugs/5670458)
-accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[enable_configurable_moe-moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True] SKIP (https://nvbugs/5727475)
 accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] SKIP (https://nvbugs/5740377)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=0-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)
 accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False] SKIP (https://nvbugs/5740377)