[Main][Refactor]Change ASCEND_QUATIZATION_METHOD to ASCEND_QUANTIZATION_METHOD (#2517)

zhanghw0354 · zhanghaiwen · web-flow · commit b3fdd78a6b6f · 2025-08-26T09:06:16.000+08:00
### What this PR does / why we need it? The constant ASCEND_QUATIZATION_METHOD in vllm_ascend/utils.py is misspelled and should be corrected to ASCEND_QUANTIZATION_METHOD. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.10.1.1 - vLLM main: vllm-project/vllm@c9abb10 Signed-off-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com> Co-authored-by: zhanghaiwen <zhanghaiwen@cmss.chinamobile.com>
diff --git a/tests/ut/quantization/test_quant_config.py b/tests/ut/quantization/test_quant_config.py
@@ -10,8 +10,7 @@
 from tests.ut.base import TestBase
 from vllm_ascend.quantization.quant_config import (AscendKVCacheMethod,
                                                    AscendQuantConfig)
-
-ASCEND_QUATIZATION_METHOD = "ascend"
+from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
 
 
 class TestAscendQuantConfig(TestBase):
@@ -42,7 +41,7 @@ def test_repr(self):
 
     def test_get_name(self):
         self.assertEqual(AscendQuantConfig.get_name(),
-                         ASCEND_QUATIZATION_METHOD)
+                         ASCEND_QUANTIZATION_METHOD)
 
     def test_get_supported_act_dtypes(self):
         supported_dtypes = AscendQuantConfig.get_supported_act_dtypes()
@@ -66,7 +65,7 @@ def test_override_quantization_method(self, mock_is_available):
         # Test when NPU is available
         mock_is_available.return_value = True
         result = AscendQuantConfig.override_quantization_method(None, None)
-        self.assertEqual(result, ASCEND_QUATIZATION_METHOD)
+        self.assertEqual(result, ASCEND_QUANTIZATION_METHOD)
 
         # Test when NPU is not available
         mock_is_available.return_value = False
diff --git a/tests/ut/test_platform.py b/tests/ut/test_platform.py
@@ -12,7 +12,7 @@
 
 from tests.ut.base import TestBase
 from vllm_ascend.platform import NPUPlatform
-from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD
+from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
 
 
 class TestNPUPlatform(TestBase):
@@ -43,7 +43,7 @@ def test_class_variables(self):
                          "ASCEND_RT_VISIBLE_DEVICES")
         self.assertEqual(NPUPlatform.dispatch_key, "PrivateUse1")
         self.assertEqual(NPUPlatform.supported_quantization,
-                         [ASCEND_QUATIZATION_METHOD])
+                         [ASCEND_QUANTIZATION_METHOD])
 
     def test_is_sleep_mode_available(self):
         self.assertTrue(self.platform.is_sleep_mode_available())
@@ -61,7 +61,7 @@ def test_pre_register_and_update_with_parser(self, mock_quant_config,
 
         mock_adapt_patch.assert_called_once_with(is_global_patch=True)
 
-        self.assertTrue(ASCEND_QUATIZATION_METHOD in mock_action.choices)
+        self.assertTrue(ASCEND_QUANTIZATION_METHOD in mock_action.choices)
         self.assertEqual(len(mock_action.choices), 3)  # original 2 + ascend
 
     @patch("vllm_ascend.utils.adapt_patch")
@@ -89,7 +89,7 @@ def test_pre_register_and_update_with_existing_ascend_quant(
             self, mock_quant_config, mock_adapt_patch):
         mock_parser = MagicMock()
         mock_action = MagicMock()
-        mock_action.choices = ["awq", ASCEND_QUATIZATION_METHOD]
+        mock_action.choices = ["awq", ASCEND_QUANTIZATION_METHOD]
         mock_parser._option_string_actions = {"--quantization": mock_action}
 
         self.platform.pre_register_and_update(mock_parser)
diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py
@@ -28,7 +28,7 @@
 
 from vllm_ascend.ascend_config import (check_ascend_config, get_ascend_config,
                                        init_ascend_config)
-from vllm_ascend.utils import (ASCEND_QUATIZATION_METHOD, is_310p,
+from vllm_ascend.utils import (ASCEND_QUANTIZATION_METHOD, is_310p,
                                update_aclgraph_sizes)
 
 if TYPE_CHECKING:
@@ -50,7 +50,7 @@ class NPUPlatform(Platform):
     device_control_env_var: str = "ASCEND_RT_VISIBLE_DEVICES"
     dispatch_key: str = "PrivateUse1"
 
-    supported_quantization: list[str] = [ASCEND_QUATIZATION_METHOD]
+    supported_quantization: list[str] = [ASCEND_QUANTIZATION_METHOD]
 
     def is_sleep_mode_available(self) -> bool:
         return True
@@ -70,8 +70,8 @@ def pre_register_and_update(cls,
             quant_action = parser._option_string_actions.get('--quantization')
             if quant_action and hasattr(quant_action,
                                         'choices') and quant_action.choices:
-                if ASCEND_QUATIZATION_METHOD not in quant_action.choices:
-                    quant_action.choices.append(ASCEND_QUATIZATION_METHOD)
+                if ASCEND_QUANTIZATION_METHOD not in quant_action.choices:
+                    quant_action.choices.append(ASCEND_QUANTIZATION_METHOD)
 
         from vllm_ascend.quantization.quant_config import \
             AscendQuantConfig  # noqa: F401
diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py
@@ -36,12 +36,12 @@
 from vllm.model_executor.utils import set_weight_attrs
 
 from vllm_ascend.ops.fused_moe import AscendUnquantizedFusedMoEMethod
-from vllm_ascend.utils import ASCEND_QUATIZATION_METHOD
+from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD
 
 from .quantizer import AscendQuantizer
 
 
-@register_quantization_config(ASCEND_QUATIZATION_METHOD)
+@register_quantization_config(ASCEND_QUANTIZATION_METHOD)
 class AscendQuantConfig(QuantizationConfig):
     """Config class for Ascend
 
@@ -57,7 +57,7 @@ def __repr__(self) -> str:
 
     @classmethod
     def get_name(cls) -> str:
-        return ASCEND_QUATIZATION_METHOD
+        return ASCEND_QUANTIZATION_METHOD
 
     @classmethod
     def get_supported_act_dtypes(cls) -> List[torch.dtype]:
@@ -80,7 +80,7 @@ def from_config(cls, config: Dict[str, Any]) -> "AscendQuantConfig":
     def override_quantization_method(cls, hf_quant_cfg,
                                      user_quant) -> Optional[str]:
         if torch.npu.is_available():
-            return ASCEND_QUATIZATION_METHOD
+            return ASCEND_QUANTIZATION_METHOD
         return None
 
     def get_quant_method(self, layer: torch.nn.Module,
diff --git a/vllm_ascend/utils.py b/vllm_ascend/utils.py
@@ -45,7 +45,7 @@
 # Maximum number of graphs that can be captured by ACL Graph
 MAX_CAPTURE_SIZE = 1920
 
-ASCEND_QUATIZATION_METHOD = "ascend"
+ASCEND_QUANTIZATION_METHOD = "ascend"
 SOC_VERSION_INFERENCE_SERIES = ["Ascend310P3"]
 
 ACL_FORMAT_FRACTAL_ND = 2