[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit e50f850e9235 · 2025-11-27T07:04:26.000Z
for more information, see https://pre-commit.ci
diff --git a/auto_round_extension/ark/__init__.py b/auto_round_extension/ark/__init__.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 from auto_round_extension.ark.qlinear import QuantLinear, QuantLinearGPTQ, QuantLinearAWQ
+
 qlinear_classes = (QuantLinear, QuantLinearGPTQ)
 
 awq_classes = (QuantLinearAWQ,)
diff --git a/auto_round_extension/ark/qlinear.py b/auto_round_extension/ark/qlinear.py
@@ -13,12 +13,15 @@
 # limitations under the License.
 
 import math
+
 import torch
 import torch.nn as nn
+
 from auto_round.utils import convert_dtype_torch2str, logger
 
 try:
     import auto_round_kernel as ark
+
     ARK_INSTALLED = True
 except:
     ARK_INSTALLED = False
@@ -31,6 +34,7 @@
 
 AWQ_REVERSE_ORDER = [0, 4, 1, 5, 2, 6, 3, 7]
 
+
 def unpack_awq(qweight: torch.Tensor, qzeros: torch.Tensor, bits: int):
     shifts = torch.arange(0, 32, bits, device="cpu")
 
@@ -51,6 +55,7 @@ def unpack_awq(qweight: torch.Tensor, qzeros: torch.Tensor, bits: int):
 
     return iweights, izeros
 
+
 def reverse_awq_order(iweights: torch.Tensor, izeros: torch.Tensor, bits: int):
     reverse_order_tensor = torch.arange(
         iweights.shape[-1],
@@ -66,14 +71,13 @@ def reverse_awq_order(iweights: torch.Tensor, izeros: torch.Tensor, bits: int):
     iweights = iweights[:, reverse_order_tensor]
     return iweights, izeros
 
+
 class QuantLinearAWQ(nn.Module):
     QUANT_TYPE = "ark_awq"
 
     def __init__(self, w_bit, group_size, in_features, out_features, bias, zero_point, dev):
         super().__init__()
-        assert (
-            ARK_INSTALLED
-        ), "Please install auto_round_kernel package."
+        assert ARK_INSTALLED, "Please install auto_round_kernel package."
 
         self.use_bf16 = ark.check_isa_supported("AMX")
 
@@ -162,9 +166,7 @@ def from_linear(cls, linear, w_bit, group_size, init_only=False, scales=None, ze
 
     @torch.no_grad()
     def forward(self, x):
-        assert ARK_INSTALLED, (
-            "ARK kernels could not be loaded. "
-        )
+        assert ARK_INSTALLED, "ARK kernels could not be loaded. "
 
         input_dtype = x.dtype
         out_shape = x.shape[:-1] + (self.out_features,)
@@ -200,6 +202,7 @@ def extra_repr(self) -> str:
             self.group_size,
         )
 
+
 class QuantLinear(nn.Module):
     QUANT_TYPE = "ark_gptq_nozp"
     ZP_BIAS = 0
@@ -220,9 +223,7 @@ def __init__(
 
         if bits not in [2, 4, 8]:
             raise NotImplementedError("Only 2, 4,8 bits are supported for ARK.")
-        assert (
-            ARK_INSTALLED
-        ), "Please install auto_round_kernel."
+        assert ARK_INSTALLED, "Please install auto_round_kernel."
 
         self.infeatures = infeatures
         self.outfeatures = outfeatures
@@ -261,9 +262,8 @@ def __init__(
         self.kernel_switch_threshold = kernel_switch_threshold
         self.trainable = trainable
 
-
     def post_init(self):
-        assert self.qweight.device.type in ["cpu", 'xpu'] 
+        assert self.qweight.device.type in ["cpu", "xpu"]
         # intweight: k x n, zeros: k / group_size x n
         intweight, zeros = unpack_to_8bit_signed(self.qweight, self.qzeros, self.bits, self.ZP_BIAS)
         if zeros is None:
@@ -275,7 +275,7 @@ def post_init(self):
                 zeros = (zeros.to(torch.int32) - (2 ** (self.bits - 1))).to(torch.int8)
             else:
                 zeros -= 2 ** (self.bits - 1)
-        if self.qweight.device.type != 'cpu':
+        if self.qweight.device.type != "cpu":
             assert not self.asym
         if not self.asym:
             intweight -= 2 ** (self.bits - 1)
@@ -285,21 +285,20 @@ def post_init(self):
         if self.asym:
             intweight = (intweight.to(torch.int32) - (2 ** (self.bits - 1))).to(torch.int8)
 
-
         logger.debug(
             f"ARK repack quantized weight: K:{intweight.shape[0]}, N:{intweight.shape[1]}, weight_dtype:{BITS_DTYPE_MAPPING[self.bits]}, scale_dtype:fp32, compute_dtype:fp32, group_size:{self.group_size}"
         )
 
-        if self.qweight.device.type == 'xpu':
-            self.sdt = 'fp16'
-            self.cdt = 'fp16'
+        if self.qweight.device.type == "xpu":
+            self.sdt = "fp16"
+            self.cdt = "fp16"
             scales = self.scales.to(torch.float16).contiguous()
         else:
-            self.sdt = 'fp32'
-            self.cdt = 'fp32'
+            self.sdt = "fp32"
+            self.cdt = "fp32"
             scales = self.scales.float().contiguous()
         self.wdt = BITS_DTYPE_MAPPING[self.bits]
-        
+
         self.qweight = ark.repack_quantized_weight(
             intweight.contiguous(),
             scales,
@@ -311,11 +310,10 @@ def post_init(self):
             self.wdt,
             # scale_dtype
             self.sdt,
-    
             self.asym,
             self.group_size,
         )
-        
+
         # self.revert_wei = torch.zeros(self.infeatures, self.outfeatures, dtype=scales.dtype, device=self.qweight.device)
         # # print(packw, packw.device, packw.dtype)
         # ark.dequantize_packed_weight(
@@ -324,20 +322,20 @@ def post_init(self):
         self.qzeros = torch.empty(0)
         self.scales = torch.empty(0)
         if self.bias is not None:
-            if  self.bias.device.type == 'cpu':
+            if self.bias.device.type == "cpu":
                 self.bias = self.bias.to(torch.float32)
             else:
                 self.bias = self.bias.to(torch.float16)
 
     def forward(self, x: torch.Tensor):
         raw_input_dtype = x.dtype
-        if x.device.type == 'cpu':
+        if x.device.type == "cpu":
             odt = torch.float32
             if raw_input_dtype != torch.float32:
                 x = x.to(torch.float32)
         else:
             odt = x.dtype
-            
+
         out_shape = x.shape[:-1] + (self.outfeatures,)
         x = x.view(-1, x.shape[-1])  # convert xd to 2d
         out_2d_shape = x.shape[:-1] + (self.outfeatures,)
@@ -353,16 +351,18 @@ def forward(self, x: torch.Tensor):
             self.wdt,  # weight_dtype
             self.sdt,  # scale_dtype
             self.asym,
-            self.group_size
+            self.group_size,
         )
-        if x.device.type == 'xpu':
+        if x.device.type == "xpu":
             outputs = outputs + bias
         return outputs.to(raw_input_dtype).view(out_shape)
 
+
 class QuantLinearGPTQ(QuantLinear):
     QUANT_TYPE = "ark_gptq"
     ZP_BIAS = 1
 
+
 @torch.no_grad()
 def unpack_to_8bit_signed(qweight, qzeros, bits, gptq_bias=1):
     wf = torch.tensor(list(range(0, 32, bits)), dtype=torch.int32, device=qweight.device).unsqueeze(0)
@@ -393,6 +393,7 @@ def unpack_to_8bit_signed(qweight, qzeros, bits, gptq_bias=1):
 
     return weight, zeros
 
+
 # Copied from qlinear_marlin.py
 @torch.no_grad()
 def dequantize_weight(qweight, qzeros, scales, bits):
@@ -402,16 +403,18 @@ def dequantize_weight(qweight, qzeros, scales, bits):
     if unpacked_qzeros is not None:
         unpacked_qzeros = unpacked_qzeros.repeat_interleave(group_size, dim=0)
     else:
-        unpacked_qzeros = torch.full_like(scales, 8 if bits == 4 else 128, dtype=torch.int32, device = qweight.device)
+        unpacked_qzeros = torch.full_like(scales, 8 if bits == 4 else 128, dtype=torch.int32, device=qweight.device)
     unpacked_qweight = (unpacked_qweight - unpacked_qzeros) * scales
 
     return unpacked_qweight, unpacked_qzeros
 
+
 def ark_post_init(model):
     for _, submodule in model.named_modules():
         if isinstance(submodule, QuantLinear):
             submodule.post_init()
 
     return model
 
-__all__ = ["QuantLinear", 'QuantLinearGPTQ', 'QuantLinearAWQ']
+
+__all__ = ["QuantLinear", "QuantLinearGPTQ", "QuantLinearAWQ"]