From 25c34fe39f14d299c41472e941c9a9c2a6dde29a Mon Sep 17 00:00:00 2001 From: Wang Kunpeng <1289706727@qq.com> Date: Wed, 25 Mar 2026 15:38:23 +0800 Subject: [PATCH 1/4] [bugfix]fixed block_size incorrect setting issue in dsv3.2 Signed-off-by: Wang Kunpeng <1289706727@qq.com> --- vllm_ascend/platform.py | 14 +++----------- vllm_ascend/utils.py | 12 ++++++------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index 61463a21fbb..1b686746d7a 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -183,17 +183,9 @@ def inference_mode(cls): @classmethod def update_block_size_for_backend(cls, vllm_config: VllmConfig) -> None: - cache_config = vllm_config.cache_config - if cache_config.user_specified_block_size: - # User specified --block-size; keep it. - return - model_config = vllm_config.model_config - if model_config is not None and model_config.is_hybrid: - # Hybrid attention+mamba models rely on the model-specific sizing - # logic rather than the generic platform default. - return - - super().update_block_size_for_backend(vllm_config) + # TODO: NPU still sets block_size in check_and_update_config. + # Move that logic here so block_size is chosen by the backend. + pass @classmethod def set_device(cls, device: torch.device): diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index 4db163c7e0b..8a1ff2202d2 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -1097,12 +1097,12 @@ def refresh_block_size(vllm_config): if not scheduler_config or not model_config: return - # TODO(MengqingCao): Remove the model_type check, after resolving the hidden error in get_kv_cache_groups. - if ( - "qwen3_next" not in model_config.hf_text_config.model_type - and "qwen3_5" not in model_config.hf_text_config.model_type - and cache_config.block_size != 128 - ): + if model_config.is_hybrid: + # Hybrid attention+mamba models rely on the model-specific sizing + # logic rather than the generic platform default. + return + + if cache_config.block_size != 128: if cache_config.enable_prefix_caching or scheduler_config.enable_chunked_prefill: logger.info("Block size is set to 128 if prefix cache or chunked prefill is enabled.") cache_config.block_size = 128 From baf0dd39badf05381b85958f826963e6e4a25b53 Mon Sep 17 00:00:00 2001 From: Wang Kunpeng <1289706727@qq.com> Date: Wed, 25 Mar 2026 17:21:03 +0800 Subject: [PATCH 2/4] [bugfix]fixed block_size incorrect setting issue in dsv3.2 Signed-off-by: Wang Kunpeng <1289706727@qq.com> --- vllm_ascend/utils.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index 8a1ff2202d2..f6651d8430a 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -1091,6 +1091,14 @@ def refresh_block_size(vllm_config): if not cache_config: return + if cache_config.user_specified_block_size: + # User specified --block-size; keep it. + if cache_config.block_size != 128: + logger.warning( + "The user specified --block-size and the value is not 128, which can lead to performance degradation" + ) + return + if cache_config.block_size is None: cache_config.block_size = 128 From e402ae17b8766d9107d9e2f5d40f754fe2c2cc19 Mon Sep 17 00:00:00 2001 From: Wang Kunpeng <1289706727@qq.com> Date: Wed, 25 Mar 2026 18:02:30 +0800 Subject: [PATCH 3/4] fix ci Signed-off-by: Wang Kunpeng <1289706727@qq.com> --- tests/ut/test_platform.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py index 914cd7f440b..9a755543813 100644 --- a/tests/ut/test_platform.py +++ b/tests/ut/test_platform.py @@ -293,6 +293,7 @@ def test_check_and_update_config_cache_config_block_size( vllm_config = TestNPUPlatform.mock_vllm_config() vllm_config.cache_config.block_size = None vllm_config.cache_config.enable_prefix_caching = True + vllm_config.cache_config.user_specified_block_size = False vllm_config.parallel_config.decode_context_parallel_size = 1 vllm_config.parallel_config.prefill_context_parallel_size = 1 vllm_config.parallel_config.tensor_parallel_size = 1 From c3ab3611a8395f992378c9de87eb41937de05d46 Mon Sep 17 00:00:00 2001 From: Wang Kunpeng <1289706727@qq.com> Date: Wed, 25 Mar 2026 18:36:19 +0800 Subject: [PATCH 4/4] [bugfix]fixed block_size incorrect setting issue in dsv3.2 Signed-off-by: Wang Kunpeng <1289706727@qq.com> --- tests/ut/test_platform.py | 1 - vllm_ascend/utils.py | 8 -------- 2 files changed, 9 deletions(-) diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py index 9a755543813..914cd7f440b 100644 --- a/tests/ut/test_platform.py +++ b/tests/ut/test_platform.py @@ -293,7 +293,6 @@ def test_check_and_update_config_cache_config_block_size( vllm_config = TestNPUPlatform.mock_vllm_config() vllm_config.cache_config.block_size = None vllm_config.cache_config.enable_prefix_caching = True - vllm_config.cache_config.user_specified_block_size = False vllm_config.parallel_config.decode_context_parallel_size = 1 vllm_config.parallel_config.prefill_context_parallel_size = 1 vllm_config.parallel_config.tensor_parallel_size = 1 diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py index f6651d8430a..8a1ff2202d2 100644 --- a/vllm_ascend/utils.py +++ b/vllm_ascend/utils.py @@ -1091,14 +1091,6 @@ def refresh_block_size(vllm_config): if not cache_config: return - if cache_config.user_specified_block_size: - # User specified --block-size; keep it. - if cache_config.block_size != 128: - logger.warning( - "The user specified --block-size and the value is not 128, which can lead to performance degradation" - ) - return - if cache_config.block_size is None: cache_config.block_size = 128