Skip to content

Commit 1c5b302

Browse files
authored
[Misc] Clean up useless patch (#3320)
### What this PR does / why we need it? 1. clean up v0.10.2 support in ut and e2e test 2. remove v0.11.0 period job, we're at v0.11.0 now. 3. remove uesless patch for deepseek v3.2. They have been done in vLLM already. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: wangxiyuan <[email protected]>
1 parent a43e2f6 commit 1c5b302

File tree

10 files changed

+29
-412
lines changed

10 files changed

+29
-412
lines changed

.github/workflows/vllm_ascend_test_full_vllm_0.11.0.yaml

Lines changed: 0 additions & 51 deletions
This file was deleted.

tests/e2e/conftest.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,7 @@
3232
BatchEncoding, BatchFeature)
3333
from transformers.models.auto.auto_factory import _BaseAutoModelClass
3434
from vllm import LLM, SamplingParams
35-
36-
from vllm_ascend.utils import vllm_version_is
37-
38-
if vllm_version_is("0.10.2"):
39-
from vllm.config import TaskOption, _get_and_verify_dtype
40-
else:
41-
from vllm.config.model import TaskOption, _get_and_verify_dtype
42-
35+
from vllm.config.model import TaskOption, _get_and_verify_dtype
4336
from vllm.inputs import TextPrompt
4437
from vllm.outputs import RequestOutput
4538
from vllm.transformers_utils.utils import maybe_model_redirect

tests/e2e/model_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,7 @@
1919

2020
from typing import Dict, List, Optional, Sequence, Tuple, Union
2121

22-
from vllm_ascend.utils import vllm_version_is
23-
24-
if vllm_version_is("0.10.2"):
25-
from vllm.sequence import PromptLogprobs, SampleLogprobs
26-
else:
27-
from vllm.logprobs import PromptLogprobs, SampleLogprobs
22+
from vllm.logprobs import PromptLogprobs, SampleLogprobs
2823

2924
TokensText = Tuple[List[int], str]
3025

tests/e2e/singlecard/test_guided_decoding.py

Lines changed: 21 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,8 @@
2222
import jsonschema
2323
import pytest
2424
import regex as re
25-
26-
from vllm_ascend.utils import vllm_version_is
27-
28-
if vllm_version_is("0.10.2"):
29-
from vllm.sampling_params import GuidedDecodingParams, SamplingParams
30-
else:
31-
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
32-
3325
from vllm.outputs import RequestOutput
26+
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
3427

3528
from tests.e2e.conftest import VllmRunner
3629

@@ -91,27 +84,16 @@ def sample_json_schema():
9184
def test_guided_json_completion(guided_decoding_backend: str,
9285
sample_json_schema):
9386
runner_kwargs: Dict[str, Any] = {}
94-
if vllm_version_is("0.10.2"):
95-
sampling_params = SamplingParams(
96-
temperature=1.0,
97-
max_tokens=500,
98-
guided_decoding=GuidedDecodingParams(json=sample_json_schema))
99-
runner_kwargs = {
100-
"seed": 0,
101-
"guided_decoding_backend": guided_decoding_backend,
102-
}
103-
else:
104-
sampling_params = SamplingParams(
105-
temperature=1.0,
106-
max_tokens=500,
107-
structured_outputs=StructuredOutputsParams(
108-
json=sample_json_schema))
109-
runner_kwargs = {
110-
"seed": 0,
111-
"structured_outputs_config": {
112-
"backend": guided_decoding_backend
113-
},
114-
}
87+
sampling_params = SamplingParams(
88+
temperature=1.0,
89+
max_tokens=500,
90+
structured_outputs=StructuredOutputsParams(json=sample_json_schema))
91+
runner_kwargs = {
92+
"seed": 0,
93+
"structured_outputs_config": {
94+
"backend": guided_decoding_backend
95+
},
96+
}
11597
with VllmRunner(MODEL_NAME, **runner_kwargs) as vllm_model:
11698
prompts = [
11799
f"Give an example JSON for an employee profile "
@@ -141,26 +123,16 @@ def test_guided_regex(guided_decoding_backend: str, sample_regex):
141123
if guided_decoding_backend == "outlines":
142124
pytest.skip("Outlines doesn't support regex-based guided decoding.")
143125
runner_kwargs: Dict[str, Any] = {}
144-
if vllm_version_is("0.10.2"):
145-
sampling_params = SamplingParams(
146-
temperature=0.8,
147-
top_p=0.95,
148-
guided_decoding=GuidedDecodingParams(regex=sample_regex))
149-
runner_kwargs = {
150-
"seed": 0,
151-
"guided_decoding_backend": guided_decoding_backend,
152-
}
153-
else:
154-
sampling_params = SamplingParams(
155-
temperature=0.8,
156-
top_p=0.95,
157-
structured_outputs=StructuredOutputsParams(regex=sample_regex))
158-
runner_kwargs = {
159-
"seed": 0,
160-
"structured_outputs_config": {
161-
"backend": guided_decoding_backend
162-
},
163-
}
126+
sampling_params = SamplingParams(
127+
temperature=0.8,
128+
top_p=0.95,
129+
structured_outputs=StructuredOutputsParams(regex=sample_regex))
130+
runner_kwargs = {
131+
"seed": 0,
132+
"structured_outputs_config": {
133+
"backend": guided_decoding_backend
134+
},
135+
}
164136

165137
with VllmRunner(MODEL_NAME, **runner_kwargs) as vllm_model:
166138
prompts = [

tests/ut/ops/test_fused_moe_prepare_and_finalize.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
FusedMoEPrepareAndFinalizeWithAll2All,
99
FusedMoEPrepareAndFinalizeWithAllGather, FusedMoEPrepareAndFinalizeWithMC2,
1010
FusedMoEPrepareAndFinalizeWithNaiveMulticast)
11-
from vllm_ascend.utils import vllm_version_is
1211

1312

1413
class TestFusedMoEPrepareAndFinalize(unittest.TestCase):
@@ -231,12 +230,8 @@ def test_naive_multicast_prepare_finalize(self, mock_get_forward_context,
231230
mock_get_dp_group):
232231
# Mock forward context with DP metadata
233232
mock_context = MagicMock()
234-
if vllm_version_is("0.10.2"):
235-
mock_context.dp_metadata.cu_tokens_across_dp_cpu = torch.tensor(
236-
[2, 5, 7])
237-
else:
238-
mock_context.dp_metadata.cu_tokens_across_sp.return_value = torch.tensor(
239-
[2, 5, 7])
233+
mock_context.dp_metadata.cu_tokens_across_sp.return_value = torch.tensor(
234+
[2, 5, 7])
240235
mock_get_forward_context.return_value = mock_context
241236

242237
# Setup DP group mock

tests/ut/ops/test_fused_ops.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
AscendUnquantizedFusedMoEMethod)
2929
from vllm_ascend.ops.moe.experts_selector import select_experts
3030
from vllm_ascend.ops.moe.moe_mlp import cumsum_group_list, unified_apply_mlp
31-
from vllm_ascend.utils import AscendSocVersion, adapt_patch, vllm_version_is
31+
from vllm_ascend.utils import AscendSocVersion, adapt_patch
3232

3333
adapt_patch(True)
3434

@@ -92,11 +92,7 @@ def mock_finalize(hidden_states, **kwargs):
9292
return hidden_states
9393

9494
mock_moe_comm_method.finalize.side_effect = mock_finalize
95-
96-
if vllm_version_is("0.10.2"):
97-
dp_metadata = MagicMock(cu_tokens_across_dp_cpu=[5, 10])
98-
else:
99-
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
95+
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
10096
mock_forward_context_obj = MagicMock(moe_comm_method=mock_moe_comm_method,
10197
moe_comm_type=MoECommType.MC2,
10298
max_tokens_across_dp=10,

tests/ut/torchair/ops/test_torchair_fused_moe.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from vllm_ascend.torchair.ops.torchair_fused_moe import (
2828
TorchairAscendFusedMoE, TorchairAscendUnquantizedFusedMoEMethod)
2929
from vllm_ascend.utils import adapt_patch # noqa E402
30-
from vllm_ascend.utils import AscendSocVersion, vllm_version_is
30+
from vllm_ascend.utils import AscendSocVersion
3131

3232
adapt_patch(True)
3333

@@ -54,10 +54,7 @@ def mock_dp_and_tp_group(mocker):
5454
@pytest.fixture
5555
def mock_dist_env(mocker: MockerFixture):
5656
# init dist env patch
57-
if vllm_version_is("0.10.2"):
58-
dp_metadata = MagicMock(cu_tokens_across_dp_cpu=[5, 10])
59-
else:
60-
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
57+
dp_metadata = MagicMock(num_tokens_across_dp_cpu=[5, 5])
6158

6259
with patch('torch.distributed.get_rank', return_value=0), \
6360
patch('torch.distributed.get_world_size', return_value=4), \

vllm_ascend/patch/platform/patch_common/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,5 @@
1919
import vllm_ascend.patch.platform.patch_common.patch_distributed # noqa
2020
import vllm_ascend.patch.platform.patch_common.patch_mamba_config # noqa
2121
import vllm_ascend.patch.platform.patch_common.patch_multimodal_merge # noqa
22-
import vllm_ascend.patch.platform.patch_common.patch_transformers_utils # noqa
2322
import vllm_ascend.patch.worker.patch_common.patch_attention_selector # noqa
2423
import vllm_ascend.patch.worker.patch_common.patch_attentionspec # noqa

vllm_ascend/patch/platform/patch_common/patch_config.py

Lines changed: 0 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,10 @@
11
import ast
22

33
import vllm.envs as envs
4-
from transformers import PretrainedConfig
5-
from vllm.config import ModelConfig
64
from vllm.config.speculative import SpeculativeConfig
75
from vllm.logger import logger
86

97

10-
# mypy: ignore-errors
11-
@property
12-
def is_deepseek_mla(self: ModelConfig):
13-
if not hasattr(self.hf_text_config, "model_type"):
14-
return False
15-
elif self.hf_text_config.model_type in \
16-
('deepseek_v2', 'deepseek_v3', 'deepseek_mtp',
17-
'kimi_k2', 'longcat_flash', 'deepseek_v32'):
18-
return self.hf_text_config.kv_lora_rank is not None
19-
elif self.hf_text_config.model_type == 'eagle':
20-
# if the model is an EAGLE module, check for the
21-
# underlying architecture
22-
return self.hf_text_config.model.model_type in \
23-
('deepseek_v2', 'deepseek_v3', 'deepseek_v32') \
24-
and self.hf_text_config.kv_lora_rank is not None
25-
return False
26-
27-
28-
@staticmethod
29-
def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig:
30-
if hf_config.model_type in ("deepseek_v3", "deepseek_v32"):
31-
hf_config.model_type = "deepseek_mtp"
32-
if hf_config.model_type == "deepseek_mtp":
33-
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
34-
hf_config.update({
35-
"n_predict": n_predict,
36-
"architectures": ["DeepSeekMTPModel"]
37-
})
38-
39-
if hf_config.architectures[0] == "MiMoForCausalLM":
40-
hf_config.model_type = "mimo_mtp"
41-
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
42-
hf_config.update({
43-
"num_hidden_layers": 0,
44-
"n_predict": n_predict,
45-
"architectures": ["MiMoMTPModel"]
46-
})
47-
48-
if hf_config.architectures[0] == "Glm4MoeForCausalLM":
49-
hf_config.model_type = "glm4_moe_mtp"
50-
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
51-
hf_config.update({
52-
"num_hidden_layers": 0,
53-
"n_predict": n_predict,
54-
"architectures": ["Glm4MoeMTPModel"]
55-
})
56-
57-
if hf_config.model_type == "ernie4_5_moe":
58-
hf_config.model_type = "ernie_mtp"
59-
if hf_config.model_type == "ernie_mtp":
60-
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
61-
hf_config.update({
62-
"n_predict": n_predict,
63-
"architectures": ["ErnieMTPModel"]
64-
})
65-
66-
if hf_config.model_type == "qwen3_next":
67-
hf_config.model_type = "qwen3_next_mtp"
68-
if hf_config.model_type == "qwen3_next_mtp":
69-
n_predict = getattr(hf_config, "num_nextn_predict_layers", None)
70-
hf_config.update({
71-
"n_predict": n_predict,
72-
"architectures": ["Qwen3NextMTP"]
73-
})
74-
if hf_config.model_type == "longcat_flash":
75-
hf_config.model_type = "longcat_flash_mtp"
76-
n_predict = getattr(hf_config, "num_nextn_predict_layers", 1)
77-
hf_config.update({
78-
"n_predict": n_predict,
79-
"architectures": ["LongCatFlashMTPModel"]
80-
})
81-
82-
return hf_config
83-
84-
858
def __post_init__(self):
869

8710
# Note: "method" is a new parameter that helps to extend the
@@ -308,6 +231,4 @@ def __post_init__(self):
308231
self.draft_tensor_parallel_size))
309232

310233

311-
ModelConfig.is_deepseek_mla = is_deepseek_mla
312234
SpeculativeConfig.__post_init__ = __post_init__
313-
SpeculativeConfig.hf_config_override = hf_config_override

0 commit comments

Comments
 (0)