fix format

jiqing-feng · jiqing-feng · commit 314f724d6869 · 2024-11-19T13:28:30.000Z
diff --git a/bitsandbytes/backends/cpu_xpu_common.py b/bitsandbytes/backends/cpu_xpu_common.py
@@ -234,7 +234,9 @@ def mm_dequant_impl(
         out_shape = (out_shape[0] * out_shape[1], out_shape[2])
 
     if compute_dtype not in [torch.float32, torch.bfloat16]:
-        warnings.warn(f"mm_dequant_{A.device}: compute_dtype {compute_dtype} is not supported, will use bfloat16 instead")
+        warnings.warn(
+            f"mm_dequant_{A.device}: compute_dtype {compute_dtype} is not supported, will use bfloat16 instead"
+        )
         compute_dtype = torch.bfloat16
     A_reshaped = A.reshape(out_shape).to(compute_dtype)
     row_stats = row_stats.reshape(-1).unsqueeze(-1).to(compute_dtype)
@@ -439,9 +441,7 @@ def dequantize_4bit_impl(
         raise NotImplementedError("bnb_4bit_use_double_quant is not supported yet for CPU/XPU")
 
     if ipex_cpu_only and _ipex_cpu_version_prereq(2, 5) and getattr(quant_state, "ipex", False):
-        A = torch.ops.ipex_prepack.woq_linear_unpack_weight(
-                A, "nf4", quant_state.shape, 2
-            )
+        A = torch.ops.ipex_prepack.woq_linear_unpack_weight(A, "nf4", quant_state.shape, 2)
         quant_state.ipex = False
 
     # Map nf4 to [-1, 1]
@@ -466,9 +466,9 @@ def dequantize_4bit_impl(
         if out is None:
             out = torch.empty(quant_state.shape, dtype=quant_state.dtype, device=A.device)
         out_reshaped = out.reshape(-1)
-        out_reshaped[: n - rem] = (out_dq[: n - rem].view(-1, blocksize) * absmax[: blocks - has_rem].view(-1, 1)).reshape(
-            -1
-        )
+        out_reshaped[: n - rem] = (
+            out_dq[: n - rem].view(-1, blocksize) * absmax[: blocks - has_rem].view(-1, 1)
+        ).reshape(-1)
         out_reshaped[n - rem :] = out_dq[n - rem :] * absmax[-1]
     else:
         out = (out_dq.view(-1, blocksize) * absmax.view(-1, 1)).reshape(quant_state.shape).to(quant_state.dtype)
@@ -513,9 +513,20 @@ def gemm_4bit_impl(
         GEMM output tensor.
     """
     if getattr(state, "ipex", False):
-        output = torch.ops.torch_ipex.woq_linear(A, B, "nf4", state.shape,
-                    state.new_scales, state.new_zeros, None, None, state.blocksize,
-                    ipex_cpu.quantization.WoqLowpMode.BF16, 1, state.compensation)
+        output = torch.ops.torch_ipex.woq_linear(
+            A,
+            B,
+            "nf4",
+            state.shape,
+            state.new_scales,
+            state.new_zeros,
+            None,
+            None,
+            state.blocksize,
+            ipex_cpu.quantization.WoqLowpMode.BF16,
+            1,
+            state.compensation,
+        )
     else:
         dqB = dequantize_4bit_impl(B, state, blocksize=state.blocksize).t()
         output = torch.matmul(A, dqB.to(A.dtype))
diff --git a/bitsandbytes/backends/xpu.py b/bitsandbytes/backends/xpu.py
@@ -15,6 +15,8 @@
 )
 
 Tensor = torch.Tensor
+
+
 def assert_on_xpu(tensors):
     on_xpu = True
     for t in tensors:
@@ -124,7 +126,6 @@ def extract_outliers(
         output = A[:, idx].contiguous()
         return output
 
-
     def quantize_4bit(
         self,
         A: torch.Tensor,
@@ -155,7 +156,7 @@ def dequantize_4bit(
             blocksize = 64
         assert_on_xpu([A, absmax, out])
         if quant_type == "nf4":
-            output = torch.ops.torch_ipex.dequantize_4bit(A, "nf4", quant_state.shape, absmax, None,blocksize).t()
+            output = torch.ops.torch_ipex.dequantize_4bit(A, "nf4", quant_state.shape, absmax, None, blocksize).t()
         else:
             output = dequantize_4bit_impl(A, quant_state, absmax, out, blocksize, quant_type)
 
diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
@@ -1006,7 +1006,6 @@ def dequantize_fp4(
     out: Optional[torch.Tensor] = None,
     blocksize: Optional[int] = None,
 ) -> Tensor:
-
     return dequantize_4bit(A, quant_state, absmax, out, blocksize, "fp4")
 
 
@@ -1017,7 +1016,6 @@ def dequantize_nf4(
     out: Optional[torch.Tensor] = None,
     blocksize: Optional[int] = None,
 ) -> Tensor:
-
     return dequantize_4bit(A, quant_state, absmax, out, blocksize, "nf4")
 
 
diff --git a/bitsandbytes/nn/__init__.py b/bitsandbytes/nn/__init__.py
@@ -2,6 +2,7 @@
 #
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
+from ..backends import backends
 from .modules import (
     Embedding,
     Int8Params,
@@ -14,7 +15,7 @@
     StableEmbedding,
     SwitchBackLinearBnb,
 )
-from ..backends import backends
+
 # CPU and XPU backend do not need triton, and XPU so not support triton for now.
 if "xpu" not in backends.keys() or ("cpu" in backends.keys() and len(backends.keys()) == 1):
     from .triton_based_modules import (
diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py
@@ -449,10 +449,7 @@ def _save_to_state_dict(self, destination, prefix, keep_vars):
         save weight and bias,
         then fill state_dict with components of quant_state
         """
-        if (
-            getattr(self.weight, "quant_state", None) is not None
-            and getattr(self.weight.quant_state, "ipex", False)
-        ):
+        if getattr(self.weight, "quant_state", None) is not None and getattr(self.weight.quant_state, "ipex", False):
             if self.weight.device.type == "cpu":
                 original_weight = torch.ops.ipex_prepack.woq_linear_unpack_weight(
                     self.weight, "nf4", self.weight.quant_state.shape, 2
diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py
@@ -201,35 +201,38 @@ def unpack_tensor_to_dict(tensor_data):
 
 
 def enable_ipex_fusion(linear):
-    from bitsandbytes.backends.cpu_xpu_common import _ipex_cpu_version_prereq, _ipex_xpu_version_prereq
-    from bitsandbytes.backends.cpu_xpu_common import ipex_cpu_only, ipex_xpu
+    from bitsandbytes.backends.cpu_xpu_common import (
+        _ipex_cpu_version_prereq,
+        _ipex_xpu_version_prereq,
+        ipex_cpu_only,
+        ipex_xpu,
+    )
 
     if ipex_cpu_only and _ipex_cpu_version_prereq(2, 5):
         quant_state = linear.weight.quant_state
-        new_weight, new_scales, new_zeros, _, compensation = \
-                torch.ops.ipex_prepack.woq_linear_pack_weight(
-                    linear.weight.data.reshape([quant_state.shape[0], quant_state.shape[1] // 2]),
-                    "nf4",
-                    quant_state.shape,  # weight shape
-                    quant_state.absmax.view(quant_state.shape[0], quant_state.shape[1] // quant_state.blocksize),  # scales
-                    None,  # zero_points
-                    None,  # bias
-                    None,  # batch_size
-                    quant_state.blocksize,
-                    2,
-                )
+        new_weight, new_scales, new_zeros, _, compensation = torch.ops.ipex_prepack.woq_linear_pack_weight(
+            linear.weight.data.reshape([quant_state.shape[0], quant_state.shape[1] // 2]),
+            "nf4",
+            quant_state.shape,  # weight shape
+            quant_state.absmax.view(quant_state.shape[0], quant_state.shape[1] // quant_state.blocksize),  # scales
+            None,  # zero_points
+            None,  # bias
+            None,  # batch_size
+            quant_state.blocksize,
+            2,
+        )
     elif ipex_xpu and _ipex_xpu_version_prereq(2, 5):
         quant_state = linear.weight.quant_state
         new_weight = linear.weight.data.reshape([quant_state.shape[0], quant_state.shape[1] // 2])
-        
+
         new_scales = quant_state.absmax.view(quant_state.shape[0], quant_state.shape[1] // quant_state.blocksize)
         new_zeros = None
         compensation = None
     linear.weight.data = new_weight.data
-    setattr(linear.weight.quant_state, "ipex", True)
-    setattr(linear.weight.quant_state, "new_scales", new_scales)
-    setattr(linear.weight.quant_state, "new_zeros", new_zeros)
-    setattr(linear.weight.quant_state, "compensation", compensation)
+    linear.weight.quant_state.ipex = True
+    linear.weight.quant_state.new_scales = new_scales
+    linear.weight.quant_state.new_zeros = new_zeros
+    linear.weight.quant_state.compensation = compensation
 
 
 class QuantState: