[None][fix] Fix is_post_quant_all2all_supported for MNNVL (NVIDIA#8355)

yuantailing · dominicshanshan · commit e781c5490bf0 · 2025-11-02T19:10:37.000-08:00
Signed-off-by: Tailing Yuan &lt;yuantailing@gmail.com&gt;
diff --git a/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py b/tensorrt_llm/_torch/modules/fused_moe/fused_moe_wide_ep.py
@@ -445,7 +445,7 @@ def is_post_quant_all2all_supported(self):
         if not self.use_postquant_alltoall:
             return False
         if self.alltoall_method_type == AlltoallMethodType.MNNVL:
-            return False
+            return True
         elif self.alltoall_method_type == AlltoallMethodType.DeepEP:
             return self.has_nvfp4
         elif self.alltoall_method_type == AlltoallMethodType.DeepEPLowLatency: