From 009d8a49b16b509b98c93e9cf1f1777fa7219995 Mon Sep 17 00:00:00 2001 From: lwq Date: Tue, 12 Aug 2025 21:21:47 +0800 Subject: [PATCH 1/2] Delete dumplicate codes and fix a bug Signed-off-by: lwq --- vllm_ascend/quantization/quant_config.py | 2 +- vllm_ascend/worker/model_runner_v1.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index abd7625ec1..89a992616e 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -102,7 +102,7 @@ def get_quant_method(self, layer: torch.nn.Module, elif isinstance(layer, FusedMoE): if self.is_layer_skipped_ascend(prefix, self.packed_modules_mapping): - return AscendUnquantizedFusedMoEMethod(layer.moe) + return AscendUnquantizedFusedMoEMethod(layer.moe if hasattr(layer, 'moe') else None) return AscendFusedMoEMethod(self, prefix, self.packed_modules_mapping) elif isinstance(layer, VocabParallelEmbedding): diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py index 594649c6d4..431509a885 100644 --- a/vllm_ascend/worker/model_runner_v1.py +++ b/vllm_ascend/worker/model_runner_v1.py @@ -1154,9 +1154,6 @@ def _process_reqs( attn_state, total_num_scheduled_tokens) - enable_dbo = self._check_dbo_is_valid(self.query_lens.tolist(), - attn_state, - total_num_scheduled_tokens) (padded_num_tokens_across_dp, num_tokens_across_dp, with_prefill, enable_dbo) = self._get_forward_metadata_across_dp_and_pad( total_num_scheduled_tokens, with_prefill, enable_dbo) From 45b652f93b75ff8163500c21c9393f62065d45ef Mon Sep 17 00:00:00 2001 From: lwq Date: Tue, 12 Aug 2025 21:39:08 +0800 Subject: [PATCH 2/2] Delete dumplicate codes and fix a bug Signed-off-by: lwq --- vllm_ascend/quantization/quant_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py index 89a992616e..b495e475ed 100644 --- a/vllm_ascend/quantization/quant_config.py +++ b/vllm_ascend/quantization/quant_config.py @@ -102,7 +102,8 @@ def get_quant_method(self, layer: torch.nn.Module, elif isinstance(layer, FusedMoE): if self.is_layer_skipped_ascend(prefix, self.packed_modules_mapping): - return AscendUnquantizedFusedMoEMethod(layer.moe if hasattr(layer, 'moe') else None) + return AscendUnquantizedFusedMoEMethod( + layer.moe if hasattr(layer, 'moe') else None) return AscendFusedMoEMethod(self, prefix, self.packed_modules_mapping) elif isinstance(layer, VocabParallelEmbedding):