Skip to content

Commit d108404

Browse files
committed
[Refactor] refactor spec decode
Signed-off-by: wangxiyuan <[email protected]>
1 parent 61866b8 commit d108404

File tree

7 files changed

+588
-502
lines changed

7 files changed

+588
-502
lines changed

vllm_ascend/models/qwen2_vl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -349,4 +349,4 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
349349
quant_config=self._maybe_ignore_quant_config(
350350
vllm_config.quant_config),
351351
prefix=maybe_prefix(prefix, "visual"),
352-
)
352+
)

vllm_ascend/spec_decode/__init__.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
#
2+
# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
3+
# Copyright 2023 The vLLM team.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
# This file is a part of the vllm-ascend project.
17+
# Adapted from vllm-project/vllm/vllm/worker/gpu_model_runner.py
18+
#
19+
from vllm_ascend.spec_decode.eagle_proposer import EagleProposer
20+
from vllm_ascend.spec_decode.mtp_proposer import MtpProposer
21+
from vllm_ascend.spec_decode.ngram_proposer import NgramProposer
22+
23+
24+
def get_spec_decode_method(method, vllm_config, device, runner):
25+
if method == "ngram":
26+
return NgramProposer(vllm_config, device, runner)
27+
elif method in ["eagle", "eagle3"]:
28+
return EagleProposer(vllm_config, device, runner)
29+
elif method == 'deepseek_mtp':
30+
return MtpProposer(vllm_config, device, runner)
31+
else:
32+
raise ValueError("Unknown speculative decoding method: "
33+
f"{method}")

0 commit comments

Comments
 (0)