We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c4b5a53 commit b0b1efaCopy full SHA for b0b1efa
vllm_ascend/worker/model_runner_v1.py
@@ -2511,7 +2511,7 @@ def profile_run(self) -> None:
2511
# MC2 will consume additional NPU memory.
2512
# Therefore, we need to run the MC2 path once here to complete its initialization,
2513
# allowing vLLM to correctly estimate the maximum memory required.
2514
- if not self.ascend_config.torchair_graph_config.enabled and self._select_moe_comm_method(
+ if self._select_moe_comm_method(
2515
self.mc2_tokens_capacity,
2516
with_prefill=True) == MoECommType.MC2:
2517
self._dummy_run(self.mc2_tokens_capacity, with_prefill=True)
0 commit comments