[Bugfix] Fix the bug of incorrect precision (#2479)

weiguihua2 · web-flow · commit dd04a96ee3ca · 2025-08-22T17:08:56.000+08:00
### What this PR does / why we need it? Fix the bug of incorrect precision - vLLM version: v0.10.0 - vLLM main: vllm-project/vllm@5341565 --------- Signed-off-by: weiguihua2 <weiguihua2@huawei.com>
diff --git a/vllm_ascend/quantization/quantizer.py b/vllm_ascend/quantization/quantizer.py
@@ -75,8 +75,8 @@ def __init__(self, quant_description):
                     "vllm.model_executor.layers.layernorm.RMSNorm", "__init__",
                     [wrapper_rmsnorm_init])
                 VLLMAscendQuantizer.apply_patch(
-                    "vllm.model_executor.layers.layernorm.RMSNorm",
-                    "forward_oot", [wrapper_rmsnorm_forward_oot])
+                    "vllm_ascend.ops.layernorm.AscendRMSNorm", "forward_oot",
+                    [wrapper_rmsnorm_forward_oot])
                 VLLMAscendQuantizer.apply_patch(
                     "vllm.model_executor.layers.vocab_parallel_embedding.VocabParallelEmbedding",
                     "__init__", [wrapper_vocab_parallel_embedding_init])