style: format code with pre-commit hooks

Anionex · Anionex · commit 61605d444f57 · 2025-10-05T23:54:09.000+08:00
Signed-off-by: Anionex &lt;1005128408@qq.com&gt;
diff --git a/tests/ut/quantization/test_w4a8_dynamic.py b/tests/ut/quantization/test_w4a8_dynamic.py
@@ -16,10 +16,9 @@ def setUp(self, mock_get_current_vllm_config, mock_get_tp_world_size):
         mock_vllm_config = Mock()
         mock_vllm_config.quant_config = Mock(
             quant_description={"group_size": 256})
-        mock_vllm_config.scheduler_config = Mock(
-            max_num_batched_tokens=2048,
-            max_model_len=2048,
-            enable_chunked_prefill=False)
+        mock_vllm_config.scheduler_config = Mock(max_num_batched_tokens=2048,
+                                                 max_model_len=2048,
+                                                 enable_chunked_prefill=False)
         mock_get_current_vllm_config.return_value = mock_vllm_config
         self.method = AscendW4A8DynamicLinearMethod()
         self.method.group_size = 8
@@ -48,11 +47,15 @@ def test_get_pergroup_param(self):
         self.assertEqual(params["weight_offset_second"].shape, (32, 1))
         # new quant version weight
         self.method.new_quant_version = True
-        params = self.method.get_pergroup_param(8, 32, torch.bfloat16,
+        params = self.method.get_pergroup_param(8,
+                                                32,
+                                                torch.bfloat16,
                                                 layer_type="column")
         self.assertEqual(params["scale_bias"].dtype, torch.float32)
         self.assertEqual(params["scale_bias"].shape, (32, 1))
-        params = self.method.get_pergroup_param(8, 32, torch.bfloat16,
+        params = self.method.get_pergroup_param(8,
+                                                32,
+                                                torch.bfloat16,
                                                 layer_type="row")
         self.assertEqual(params["scale_bias"].dtype, torch.float32)
         self.assertEqual(params["scale_bias"].shape, (32, 16))
@@ -61,23 +64,27 @@ def test_get_pergroup_param(self):
     @patch('torch.Tensor.npu')
     def test_process_weights_after_loading(self, mock_npu,
                                            mock_npu_convert_weight):
-        mock_npu.side_effect = lambda: torch.zeros((1, 32), dtype=torch.float32)
+        mock_npu.side_effect = lambda: torch.zeros(
+            (1, 32), dtype=torch.float32)
         mock_npu_convert_weight.return_value = torch.zeros((32, 4),
-                                                            dtype=torch.int32)
+                                                           dtype=torch.int32)
         # old quant version weight
         layer = torch.nn.Module()
-        layer.weight = torch.nn.Parameter(torch.zeros((32, 8), dtype=torch.int8),
+        layer.weight = torch.nn.Parameter(torch.zeros((32, 8),
+                                                      dtype=torch.int8),
                                           requires_grad=False)
-        layer.weight_scale = torch.nn.Parameter(torch.ones((32, 1),
-                                                            dtype=torch.float32),
+        layer.weight_scale = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
                                                 requires_grad=False)
-        layer.weight_offset = torch.nn.Parameter(
-            torch.empty_like(layer.weight_scale.data), requires_grad=False)
-        layer.weight_scale_second = torch.nn.Parameter(
-            torch.ones((32, 1), dtype=torch.float32), requires_grad=False)
-        layer.weight_offset_second = torch.nn.Parameter(
-            torch.empty_like(layer.weight_scale_second.data),
-            requires_grad=False)
+        layer.weight_offset = torch.nn.Parameter(torch.empty_like(
+            layer.weight_scale.data),
+                                                 requires_grad=False)
+        layer.weight_scale_second = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
+                                                       requires_grad=False)
+        layer.weight_offset_second = torch.nn.Parameter(torch.empty_like(
+            layer.weight_scale_second.data),
+                                                        requires_grad=False)
         self.method.process_weights_after_loading(layer)
         self.assertTrue(hasattr(layer, "weight_scale_bias"))
         self.assertEqual(layer.weight_scale_bias.data.shape, (32, ))
@@ -86,19 +93,22 @@ def test_process_weights_after_loading(self, mock_npu,
         self.method.new_quant_version = True
         new_layer = torch.nn.Module()
         new_layer.weight = torch.nn.Parameter(torch.zeros((16, 8),
-                                                           dtype=torch.int8),
+                                                          dtype=torch.int8),
                                               requires_grad=False)
-        new_layer.weight_scale = torch.nn.Parameter(
-            torch.ones((32, 1), dtype=torch.float32), requires_grad=False)
-        new_layer.weight_offset = torch.nn.Parameter(
-            torch.empty_like(new_layer.weight_scale.data), requires_grad=False)
-        new_layer.weight_scale_second = torch.nn.Parameter(
-            torch.ones((32, 1), dtype=torch.float32), requires_grad=False)
+        new_layer.weight_scale = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
+                                                    requires_grad=False)
+        new_layer.weight_offset = torch.nn.Parameter(torch.empty_like(
+            new_layer.weight_scale.data),
+                                                     requires_grad=False)
+        new_layer.weight_scale_second = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
+                                                           requires_grad=False)
         new_layer.weight_offset_second = torch.nn.Parameter(
             torch.empty_like(new_layer.weight_scale_second.data),
             requires_grad=False)
-        new_layer.scale_bias = torch.nn.Parameter(torch.zeros((32, 1),
-                                                               dtype=torch.float32),
+        new_layer.scale_bias = torch.nn.Parameter(torch.zeros(
+            (32, 1), dtype=torch.float32),
                                                   requires_grad=False)
         self.method.process_weights_after_loading(new_layer)
         self.assertEqual(new_layer.scale_bias.data.shape, (32, ))
diff --git a/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py b/tests/ut/torchair/quantization/test_torchair_w4a8_dynamic.py
@@ -11,7 +11,9 @@
 class TestAscendW4A8DynamicLinearMethod(TestBase):
 
     @patch('vllm.distributed.get_tensor_model_parallel_world_size')
-    @patch('vllm_ascend.torchair.quantization.torchair_w4a8_dynamic.get_current_vllm_config')
+    @patch(
+        'vllm_ascend.torchair.quantization.torchair_w4a8_dynamic.get_current_vllm_config'
+    )
     def setUp(self, mock_get_current_vllm_config, mock_get_tp_world_size):
         mock_get_tp_world_size.return_value = 1
         mock_vllm_config = Mock()
@@ -45,11 +47,15 @@ def test_get_pergroup_param(self):
         self.assertEqual(params["weight_offset_second"].shape, (32, 1))
         # new quant version weight
         self.method.new_quant_version = True
-        params = self.method.get_pergroup_param(8, 32, torch.bfloat16,
+        params = self.method.get_pergroup_param(8,
+                                                32,
+                                                torch.bfloat16,
                                                 layer_type="column")
         self.assertEqual(params["scale_bias"].dtype, torch.float32)
         self.assertEqual(params["scale_bias"].shape, (32, 1))
-        params = self.method.get_pergroup_param(8, 32, torch.bfloat16,
+        params = self.method.get_pergroup_param(8,
+                                                32,
+                                                torch.bfloat16,
                                                 layer_type="row")
         self.assertEqual(params["scale_bias"].dtype, torch.float32)
         self.assertEqual(params["scale_bias"].shape, (32, 16))
@@ -58,23 +64,27 @@ def test_get_pergroup_param(self):
     @patch('torch.Tensor.npu')
     def test_process_weights_after_loading(self, mock_npu,
                                            mock_npu_convert_weight):
-        mock_npu.side_effect = lambda: torch.zeros((1, 32), dtype=torch.float32)
+        mock_npu.side_effect = lambda: torch.zeros(
+            (1, 32), dtype=torch.float32)
         mock_npu_convert_weight.return_value = torch.zeros((32, 4),
-                                                            dtype=torch.int32)
+                                                           dtype=torch.int32)
         # old quant version weight
         layer = torch.nn.Module()
-        layer.weight = torch.nn.Parameter(torch.zeros((32, 8), dtype=torch.int8),
+        layer.weight = torch.nn.Parameter(torch.zeros((32, 8),
+                                                      dtype=torch.int8),
                                           requires_grad=False)
-        layer.weight_scale = torch.nn.Parameter(torch.ones((32, 1),
-                                                            dtype=torch.float32),
+        layer.weight_scale = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
                                                 requires_grad=False)
-        layer.weight_offset = torch.nn.Parameter(
-            torch.empty_like(layer.weight_scale.data), requires_grad=False)
-        layer.weight_scale_second = torch.nn.Parameter(
-            torch.ones((32, 1), dtype=torch.float32), requires_grad=False)
-        layer.weight_offset_second = torch.nn.Parameter(
-            torch.empty_like(layer.weight_scale_second.data),
-            requires_grad=False)
+        layer.weight_offset = torch.nn.Parameter(torch.empty_like(
+            layer.weight_scale.data),
+                                                 requires_grad=False)
+        layer.weight_scale_second = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
+                                                       requires_grad=False)
+        layer.weight_offset_second = torch.nn.Parameter(torch.empty_like(
+            layer.weight_scale_second.data),
+                                                        requires_grad=False)
         self.method.process_weights_after_loading(layer)
         self.assertTrue(hasattr(layer, "weight_scale_bias"))
         self.assertEqual(layer.weight_scale_bias.data.shape, (32, ))
@@ -83,19 +93,22 @@ def test_process_weights_after_loading(self, mock_npu,
         self.method.new_quant_version = True
         new_layer = torch.nn.Module()
         new_layer.weight = torch.nn.Parameter(torch.zeros((16, 8),
-                                                           dtype=torch.int8),
+                                                          dtype=torch.int8),
                                               requires_grad=False)
-        new_layer.weight_scale = torch.nn.Parameter(
-            torch.ones((32, 1), dtype=torch.float32), requires_grad=False)
-        new_layer.weight_offset = torch.nn.Parameter(
-            torch.empty_like(new_layer.weight_scale.data), requires_grad=False)
-        new_layer.weight_scale_second = torch.nn.Parameter(
-            torch.ones((32, 1), dtype=torch.float32), requires_grad=False)
+        new_layer.weight_scale = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
+                                                    requires_grad=False)
+        new_layer.weight_offset = torch.nn.Parameter(torch.empty_like(
+            new_layer.weight_scale.data),
+                                                     requires_grad=False)
+        new_layer.weight_scale_second = torch.nn.Parameter(torch.ones(
+            (32, 1), dtype=torch.float32),
+                                                           requires_grad=False)
         new_layer.weight_offset_second = torch.nn.Parameter(
             torch.empty_like(new_layer.weight_scale_second.data),
             requires_grad=False)
-        new_layer.scale_bias = torch.nn.Parameter(torch.zeros((32, 1),
-                                                               dtype=torch.float32),
+        new_layer.scale_bias = torch.nn.Parameter(torch.zeros(
+            (32, 1), dtype=torch.float32),
                                                   requires_grad=False)
         self.method.process_weights_after_loading(new_layer)
         self.assertEqual(new_layer.scale_bias.data.shape, (32, ))
diff --git a/vllm_ascend/quantization/quant_config.py b/vllm_ascend/quantization/quant_config.py
@@ -288,13 +288,16 @@ def create_weights(
             layer.register_parameter(perchannel_name, param)
             set_weight_attrs(param, extra_weight_attrs)
 
-        # NOTE: In w4a8 quantization implementation, 
-        # for down_proj and o_proj scale_bias shape is [output_size, 16], 
+        # NOTE: In w4a8 quantization implementation,
+        # for down_proj and o_proj scale_bias shape is [output_size, 16],
         # others are [output_size, 1]
-        layer_type = "row" if isinstance(layer, RowParallelLinear) else "others"
-        
+        layer_type = "row" if isinstance(layer,
+                                         RowParallelLinear) else "others"
+
         pergroup_dict = self.quant_method.get_pergroup_param(
-            input_size_per_partition, output_size_per_partition, params_dtype,
+            input_size_per_partition,
+            output_size_per_partition,
+            params_dtype,
             layer_type=layer_type)
         for pergroup_name, pergroup_param in pergroup_dict.items():
             param = torch.nn.Parameter(pergroup_param, requires_grad=False)
diff --git a/vllm_ascend/quantization/w4a8_dynamic.py b/vllm_ascend/quantization/w4a8_dynamic.py
@@ -36,14 +36,14 @@ class AscendW4A8DynamicLinearMethod:
 
     def __init__(self):
         self.transpose_weight = True
-        
+
         vllm_config = get_current_vllm_config()
         self.group_size = vllm_config.quant_config.quant_description.get(
             "group_size", 256)
         quant_version = vllm_config.quant_config.quant_description.get(
             "version", "0")
         self.new_quant_version = quant_version == "1.0.0"
-        
+
         from vllm.distributed import get_tensor_model_parallel_world_size
         self.tp_size = get_tensor_model_parallel_world_size()
 
@@ -83,8 +83,10 @@ def get_perchannel_param(output_size: int,
                              params_dtype: torch.dtype) -> Dict[str, Any]:
         return {}
 
-    def get_pergroup_param(self, input_size: int, output_size: int,
-                           params_dtype: torch.dtype, 
+    def get_pergroup_param(self,
+                           input_size: int,
+                           output_size: int,
+                           params_dtype: torch.dtype,
                            layer_type: Optional[str] = None) -> Dict[str, Any]:
         """
         Create per-group quantization parameters.
@@ -105,12 +107,12 @@ def get_pergroup_param(self, input_size: int, output_size: int,
                                                           self.group_size,
                                                           dtype=params_dtype)
 
-        # NOTE: In w4a8 quantization implementation, 
-        #       for down_proj and o_proj(layer_type == "row") scale_bias shape is [output_size, 16], 
+        # NOTE: In w4a8 quantization implementation,
+        #       for down_proj and o_proj(layer_type == "row") scale_bias shape is [output_size, 16],
         #       others are [output_size, 1]
         if self.new_quant_version:
             scale_bias_dim = 16 if layer_type == "row" else 1
-            
+
             params_dict["scale_bias"] = torch.empty(output_size,
                                                     scale_bias_dim,
                                                     dtype=torch.float32)
@@ -147,7 +149,7 @@ def process_scale_second(weight: torch.Tensor,
             weight_high = weight_high.reshape(k, n)
             bias = 8 * (weight_high.to(torch.float32) * scale).sum(dim=0)
         # NOTE: scale_bias is not used currently
-        #       because in msmodelslim w4a8 uses symmetric quantization 
+        #       because in msmodelslim w4a8 uses symmetric quantization
 
         # TODO: support potential future asymmetric quantization
         antiquant_scale = (scale * per_group_scale).reshape(group_num, n)
diff --git a/vllm_ascend/quantization/w8a8.py b/vllm_ascend/quantization/w8a8.py
@@ -85,7 +85,9 @@ def get_perchannel_param(
                                                    dtype=params_dtype)
         return params_dict
 
-    def get_pergroup_param(self, input_size: int, output_size: int,
+    def get_pergroup_param(self,
+                           input_size: int,
+                           output_size: int,
                            params_dtype: torch.dtype,
                            layer_type: Optional[str] = None) -> Dict[str, Any]:
         return {}
diff --git a/vllm_ascend/quantization/w8a8_dynamic.py b/vllm_ascend/quantization/w8a8_dynamic.py
@@ -62,7 +62,9 @@ def get_perchannel_param(
                                                    dtype=params_dtype)
         return params_dict
 
-    def get_pergroup_param(self, input_size: int, output_size: int,
+    def get_pergroup_param(self,
+                           input_size: int,
+                           output_size: int,
                            params_dtype: torch.dtype,
                            layer_type: Optional[str] = None) -> Dict[str, Any]:
         return {}
diff --git a/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py b/vllm_ascend/torchair/quantization/torchair_w4a8_dynamic.py
@@ -39,14 +39,14 @@ class TorchairAscendW4A8DynamicLinearMethod:
 
     def __init__(self):
         self.transpose_weight = True
-        
+
         vllm_config = get_current_vllm_config()
         self.group_size = vllm_config.quant_config.quant_description.get(
             "group_size", 256)
         quant_version = vllm_config.quant_config.quant_description.get(
             "version", "0")
         self.new_quant_version = quant_version == "1.0.0"
-        
+
         from vllm.distributed import get_tensor_model_parallel_world_size
         self.tp_size = get_tensor_model_parallel_world_size()
 
@@ -78,8 +78,10 @@ def get_perchannel_param(output_size: int,
                              params_dtype: torch.dtype) -> Dict[str, Any]:
         return {}
 
-    def get_pergroup_param(self, input_size: int, output_size: int,
-                           params_dtype: torch.dtype, 
+    def get_pergroup_param(self,
+                           input_size: int,
+                           output_size: int,
+                           params_dtype: torch.dtype,
                            layer_type: Optional[str] = None) -> Dict[str, Any]:
         params_dict = {}
         params_dict["weight_scale"] = torch.empty(output_size,
@@ -166,7 +168,8 @@ def process_weights_after_loading(self, layer: torch.nn.Module):
         if self.new_quant_version:
             assert layer.weight.data.shape[-1] % 4 == 0, \
                 f"the last dim of weight needs to be divided by 4, got shape {layer.weight.data.shape}"
-            layer.weight.data = layer.weight.data.view(torch.int32).contiguous()
+            layer.weight.data = layer.weight.data.view(
+                torch.int32).contiguous()
         else:
             layer.weight.data = torch_npu.npu_convert_weight_to_int4pack(
                 layer.weight.data.to(torch.int32))