Rebase main

wxsIcey · wangxiyuan · commit b0b1efa04b30 · 2025-09-29T21:48:56.000+08:00
Signed-off-by: wxsIcey &lt;1790571317@qq.com&gt;
Signed-off-by: wangxiyuan &lt;wangxiyuan1007@gmail.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -2511,7 +2511,7 @@ def profile_run(self) -> None:
             # MC2 will consume additional NPU memory.
             # Therefore, we need to run the MC2 path once here to complete its initialization,
             # allowing vLLM to correctly estimate the maximum memory required.
-            if not self.ascend_config.torchair_graph_config.enabled and self._select_moe_comm_method(
+            if self._select_moe_comm_method(
                     self.mc2_tokens_capacity,
                     with_prefill=True) == MoECommType.MC2:
                 self._dummy_run(self.mc2_tokens_capacity, with_prefill=True)