rename mxfp scale format transformation function

linfeng-yuan · linfeng-yuan · commit 5d27a649569f · 2026-03-25T12:54:30.000+08:00
Signed-off-by: linfeng-yuan &lt;1102311262@qq.com&gt;
diff --git a/vllm_ascend/device/device_op.py b/vllm_ascend/device/device_op.py
@@ -59,7 +59,7 @@ def npu_moe_init_routing(
         )
 
     @staticmethod
-    def normalize_mxfp8_scale_layout(scale: torch.Tensor | None) -> torch.Tensor | None:
+    def maybe_normalize_mxfp_scale_layout(scale: torch.Tensor | None) -> torch.Tensor | None:
         return scale
 
     @staticmethod
@@ -233,7 +233,7 @@ def npu_moe_init_routing(
         )
 
     @staticmethod
-    def normalize_mxfp8_scale_layout(scale: torch.Tensor | None) -> torch.Tensor | None:
+    def maybe_normalize_mxfp_scale_layout(scale: torch.Tensor | None) -> torch.Tensor | None:
         if scale is None or scale.ndim != 2:
             return scale
         if scale.shape[-1] % 2 != 0:
@@ -291,7 +291,7 @@ def npu_dynamic_quant(
         if dynamic_scale is None:
             hidden_states, dynamic_scale = torch_npu.npu_dynamic_mx_quant(hidden_states, dst_type=act_quant_type)
 
-        return hidden_states, A5DeviceAdaptor.normalize_mxfp8_scale_layout(dynamic_scale)
+        return hidden_states, A5DeviceAdaptor.maybe_normalize_mxfp_scale_layout(dynamic_scale)
 
     @staticmethod
     def npu_grouped_matmul_swiglu_quant(
@@ -328,7 +328,7 @@ def npu_grouped_matmul_swiglu_quant(
             weight_scale_dtype=FLOAT8_E8M0FNU_DTYPE,
             x_scale_dtype=FLOAT8_E8M0FNU_DTYPE,
         )
-        return out, A5DeviceAdaptor.normalize_mxfp8_scale_layout(out_scale), None
+        return out, A5DeviceAdaptor.maybe_normalize_mxfp_scale_layout(out_scale), None
 
     @staticmethod
     def get_quant_gmm2_kwargs(
diff --git a/vllm_ascend/ops/fused_moe/moe_mlp.py b/vllm_ascend/ops/fused_moe/moe_mlp.py
@@ -128,7 +128,9 @@ def quant_apply_mlp(
         quantized_hidden_states = None
     else:
         unquantized_hidden_states = None
-        pertoken_scale = DeviceOperator.normalize_mxfp8_scale_layout(dynamic_scale) if use_mxfp_quant else dynamic_scale
+        pertoken_scale = (
+            DeviceOperator.maybe_normalize_mxfp_scale_layout(dynamic_scale) if use_mxfp_quant else dynamic_scale
+        )
         quantized_hidden_states = hidden_states
 
     bias1, bias2 = None, None