fix torchair mtp

linfeng-yuan · wangxiyuan · commit a92fe2a54866 · 2025-09-29T17:00:44.000+08:00
Signed-off-by: linfeng-yuan &lt;1102311262@qq.com&gt;
diff --git a/vllm_ascend/spec_decode/mtp_proposer.py b/vllm_ascend/spec_decode/mtp_proposer.py
@@ -3,7 +3,6 @@
 import torch
 import torch.nn as nn
 import torchair
-import vllm.envs as envs_vllm
 from torchair import patch_for_hcom
 from vllm.attention.layer import Attention
 from vllm.config import (VllmConfig, get_layers_from_vllm_config,
@@ -607,7 +606,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
             self.torchair_compiled_model = torch.compile(
                 self.model,
                 dynamic=not get_ascend_config().use_sfa,
-                fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
+                fullgraph=True,
                 backend=npu_backend)
             return self.torchair_compiled_model
         else:
@@ -630,7 +629,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
                 batch_size] = torchair.inference.cache_compile(
                     self.model.__dict__[forward_proxy_name],
                     dynamic=not get_ascend_config().use_sfa,
-                    fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
+                    fullgraph=True,
                     cache_dir=TORCHAIR_CACHE_DIR,
                     config=config,
                     ge_cache=False)