diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 56c3bf3c55..c0fe655836 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -27,6 +27,7 @@ from vllm.logger import logger from vllm.platforms import Platform, PlatformEnum +import vllm_ascend.envs as envs_ascend from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, check_torchair_cache_exist, @@ -169,6 +170,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: check_ascend_config(vllm_config, enforce_eager) + if vllm_config.speculative_config and envs_ascend.VLLM_ASCEND_ENABLE_DBO: + raise ValueError( + "DBO and mtp can't work at the same time. Please `export VLLM_ASCEND_ENABLE_DBO=0`" + ) if enforce_eager or compilation_config.level == CompilationLevel.NO_COMPILATION: logger.info("Compilation disabled, using eager mode by default") compilation_config.level = CompilationLevel.NO_COMPILATION @@ -194,7 +199,7 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: logger.warning( "Ray distributed executor backend is not compatible with ACL Graph mode " "right now. Setting level to NO_COMPILATION") - compilation_config.level = CompilationLevel.NO_COMPILATION + compilation_config.level = CompilationLevel.NfvO_COMPILATION else: logger.info( "PIECEWISE compilation enabled on NPU. use_inductor not supported - " diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 4930d80a81..ad62c62f0e 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -221,9 +221,6 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device): self.spec_token_num = 0 self.decode_token_per_req = 1 if self.speculative_config: - if envs_ascend.VLLM_ASCEND_ENABLE_DBO: - raise NotImplementedError( - "DBO and mtp can't work at the same currently") self.use_spec_decode = True self.spec_token_num = self.speculative_config.num_speculative_tokens assert self.spec_token_num > 0