Skip to content

Commit a92fe2a

Browse files
linfeng-yuanwangxiyuan
authored andcommitted
fix torchair mtp
Signed-off-by: linfeng-yuan <[email protected]>
1 parent 5e2335b commit a92fe2a

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

vllm_ascend/spec_decode/mtp_proposer.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import torch
44
import torch.nn as nn
55
import torchair
6-
import vllm.envs as envs_vllm
76
from torchair import patch_for_hcom
87
from vllm.attention.layer import Attention
98
from vllm.config import (VllmConfig, get_layers_from_vllm_config,
@@ -607,7 +606,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
607606
self.torchair_compiled_model = torch.compile(
608607
self.model,
609608
dynamic=not get_ascend_config().use_sfa,
610-
fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
609+
fullgraph=True,
611610
backend=npu_backend)
612611
return self.torchair_compiled_model
613612
else:
@@ -630,7 +629,7 @@ def _get_torchair_lazy_compiled_model(self, batch_size: int):
630629
batch_size] = torchair.inference.cache_compile(
631630
self.model.__dict__[forward_proxy_name],
632631
dynamic=not get_ascend_config().use_sfa,
633-
fullgraph=envs_vllm.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE,
632+
fullgraph=True,
634633
cache_dir=TORCHAIR_CACHE_DIR,
635634
config=config,
636635
ge_cache=False)

0 commit comments

Comments
 (0)