Skip to content

Commit 007f5b6

Browse files
linfeng-yuan赵江江
authored andcommitted
fix: dbo fit on 0.9.1
Signed-off-by: 赵江江 <[email protected]>
1 parent 19c7b3e commit 007f5b6

File tree

6 files changed

+31
-5
lines changed

6 files changed

+31
-5
lines changed

tests/long_term/spec_decode_v1/test_v1_mtp_correctness.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,12 @@ def test_mtp_correctness(
7878
},
7979
max_model_len=256,
8080
gpu_memory_utilization=0.8,
81-
enforce_eager=True)
81+
enforce_eager=True,
82+
additional_config={
83+
"ascend_scheduler_config": {
84+
"enabled": True
85+
},
86+
})
8287
spec_outputs = spec_llm.chat(test_prompts, sampling_config)
8388
matches = 0
8489
misses = 0

tests/singlecard/core/test_ascend_scheduler_e2e.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def model() -> LLM:
1818
MODEL,
1919
enforce_eager=True,
2020
enable_prefix_caching=True,
21+
max_model_len=200,
2122
max_num_batched_tokens=200,
2223
max_num_seqs=3,
2324
additional_config={"ascend_scheduler_config": {

vllm_ascend/ascend_config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,13 @@ def check_ascend_config(vllm_config, enforce_eager):
203203
"Ascend scheduler is only supported for V1 Engine.")
204204
# for v1 engine
205205
else:
206+
# TODO(yexiong): remove this verification after mtp model supports original vllm scheduler
207+
if (not ascend_config.ascend_scheduler_config.enabled
208+
and vllm_config.speculative_config
209+
and vllm_config.speculative_config.method == 'deepseek_mtp'):
210+
raise NotImplementedError(
211+
"Currently deepseek MTP model is only supported for ascend scheduler."
212+
)
206213
# for eager mode
207214
if enforce_eager:
208215
# torchair_graph cannot be enabled with eager mode.

vllm_ascend/core/schedule_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,16 @@ def __post_init__(self) -> None:
5555
self.max_num_encoder_input_tokens = self.max_num_batched_tokens
5656
self.encoder_cache_size = self.max_num_batched_tokens
5757
self.chunked_prefill_enabled = self.enable_chunked_prefill
58+
if (self.max_num_batched_tokens < self.max_model_len
59+
and not self.chunked_prefill_enabled):
60+
raise ValueError(
61+
"Ascend scheduler is enabled without chunked prefill feature. "
62+
f"Argument max_num_batched_tokens ({self.max_num_batched_tokens}) is "
63+
f"smaller than max_model_len ({self.max_model_len}). "
64+
"This effectively limits the maximum sequence length to "
65+
"max_num_batched_tokens and makes vLLM reject longer "
66+
"sequences. Please increase max_num_batched_tokens or "
67+
"decrease max_model_len.")
5868
if self.policy != "fcfs":
5969
raise NotImplementedError(
6070
f"currently AscendScheduler only supports fcfs policy, got {self.policy}"

vllm_ascend/platform.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
from vllm.logger import logger
2828
from vllm.platforms import Platform, PlatformEnum
2929

30+
import vllm_ascend.envs as envs_ascend
3031
from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config
3132
from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD,
3233
check_torchair_cache_exist,
@@ -147,14 +148,19 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
147148

148149
check_ascend_config(vllm_config, enforce_eager)
149150

151+
if vllm_config.speculative_config and envs_ascend.VLLM_ASCEND_ENABLE_DBO:
152+
raise ValueError(
153+
"DBO and mtp can't work at the same time. Please `export VLLM_ASCEND_ENABLE_DBO=0`"
154+
)
155+
150156
if enforce_eager or compilation_config.level == CompilationLevel.NO_COMPILATION:
151157
logger.info("Compilation disabled, using eager mode by default")
152158
compilation_config.level = CompilationLevel.NO_COMPILATION
153159
elif compilation_config.level != CompilationLevel.PIECEWISE:
154160
logger.warning(
155161
"NPU does not support %s compilation level. Setting level to NO_COMPILATION",
156162
compilation_config.level)
157-
compilation_config.level = CompilationLevel.NO_COMPILATION
163+
compilation_config.level = CompilationLevel.NfvO_COMPILATION
158164
elif ascend_config.torchair_graph_config.enabled:
159165
logger.info(
160166
"Torchair compilation enabled on NPU. Setting level to NO_COMPILATION"

vllm_ascend/worker/model_runner_v1.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,6 @@ def __init__(self, vllm_config: VllmConfig, device: torch.device):
219219
self.spec_token_num = 0
220220
self.decode_token_per_req = 1
221221
if self.speculative_config:
222-
if envs_ascend.VLLM_ASCEND_ENABLE_DBO:
223-
raise NotImplementedError(
224-
"DBO and mtp can't work at the same currently")
225222
self.use_spec_decode = True
226223
self.spec_token_num = self.speculative_config.num_speculative_tokens
227224
assert self.spec_token_num > 0

0 commit comments

Comments
 (0)