foundation-model-stack
diff --git a/‎fms_mo/quant/quantizers.py‎
Lines changed: 10 additions & 4 deletions b/‎fms_mo/quant/quantizers.py‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎fms_mo/quant_refactor/base_quant.py‎
Lines changed: 4 additions & 4 deletions b/‎fms_mo/quant_refactor/base_quant.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎fms_mo/quant_refactor/get_quantizer_new.py‎
Lines changed: 21 additions & 27 deletions b/‎fms_mo/quant_refactor/get_quantizer_new.py‎
Lines changed: 21 additions & 27 deletions
diff --git a/‎fms_mo/quant_refactor/linear_utils.py‎
Lines changed: 10 additions & 8 deletions b/‎fms_mo/quant_refactor/linear_utils.py‎
Lines changed: 10 additions & 8 deletions
diff --git a/‎fms_mo/quant_refactor/lsq_new.py‎
Lines changed: 8 additions & 6 deletions b/‎fms_mo/quant_refactor/lsq_new.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎fms_mo/quant_refactor/pact2_new.py‎
Lines changed: 4 additions & 6 deletions b/‎fms_mo/quant_refactor/pact2_new.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎fms_mo/quant_refactor/pact2sym_new.py‎
Lines changed: 4 additions & 6 deletions b/‎fms_mo/quant_refactor/pact2sym_new.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎fms_mo/quant_refactor/pact_new.py‎
Lines changed: 4 additions & 6 deletions b/‎fms_mo/quant_refactor/pact_new.py‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎fms_mo/quant_refactor/pactplussym_new.py‎
Lines changed: 4 additions & 6 deletions b/‎fms_mo/quant_refactor/pactplussym_new.py‎
Lines changed: 4 additions & 6 deletions
@@ -510,7 +510,9 @@ def forward(
 
         if istraining:
             # only recalc clipvals under training mode
-            num_bits_int = num_bits.item() if isinstance(num_bits, torch.Tensor) else num_bits
+            num_bits_int = (
+                num_bits.item() if isinstance(num_bits, torch.Tensor) else num_bits
+            )
             SAWBcode_mapping = {8: 803, 4: 403, 2: 103}
             if num_bits in [2, 4, 8]:
                 sawb_code = SAWBcode_mapping[num_bits_int]
@@ -550,9 +552,13 @@ def forward(
                 clip_val.dtype
             )  # NOTE return will be a fp32 tensor; function only support float()
         else:
-            output = torch.quantize_per_channel(
-                input_tensor, scale, zero_point, 0, torch.qint8
-            ).int_repr().clamp(int_l, int_u)
+            output = (
+                torch.quantize_per_channel(
+                    input_tensor, scale, zero_point, 0, torch.qint8
+                )
+                .int_repr()
+                .clamp(int_l, int_u)
+            )
             # NOTE return will be a torch.int8 tensor
 
         return output
 
@@ -103,9 +103,7 @@ def __init__(
                     assert Nch > 0, "Provided Nch is negative"
                     self.Nch = Nch
                 else:
-                    raise RuntimeError(
-                        "perCh was selected without specifying Nch."
-                    )
+                    raise RuntimeError("perCh was selected without specifying Nch.")
                 if axis is not None and issubclass(type(axis), int):
                     self.axis = axis
                 else:
@@ -220,7 +218,9 @@ def __init__(
         self.align_zero = align_zero
         self.clipSTE = clipSTE
 
-        temp_clipvals = torch.ones(self.qscheme.Nch) if self.perCh else torch.Tensor([1.0])
+        temp_clipvals = (
+            torch.ones(self.qscheme.Nch) if self.perCh else torch.Tensor([1.0])
+        )
         self.register_parameter("clip_val", torch.nn.Parameter(temp_clipvals.clone()))
         # Keep clip_valn as positive 1.0 to allow simpler multiplication with
         #   negative numbers (clip_valn.data *= clip_valn)
 
@@ -16,6 +16,7 @@
 Functions to create quantizers for activation and weights.  Called from Qmodule level.
 """
 
+# Third Party
 import torch
 
 # Local
@@ -38,15 +39,15 @@
 
 
 def get_activation_quantizer_new(
-    qa_mode:str="PACT",
-    nbits:int=32,
-    clip_val:torch.FloatTensor=None,
-    clip_valn:torch.FloatTensor=None,
-    non_neg:bool=False,
-    align_zero:bool=True,  # pylint: disable=unused-argument
-    extend_act_range:bool=False,
-    use_PT_native_Qfunc:bool=False,
-    use_subnormal:bool=False,
+    qa_mode: str = "PACT",
+    nbits: int = 32,
+    clip_val: torch.FloatTensor = None,
+    clip_valn: torch.FloatTensor = None,
+    non_neg: bool = False,
+    align_zero: bool = True,  # pylint: disable=unused-argument
+    extend_act_range: bool = False,
+    use_PT_native_Qfunc: bool = False,
+    use_subnormal: bool = False,
 ):
     """Return a quantizer for activation quantization
     Regular quantizers:
@@ -212,16 +213,16 @@ def get_activation_quantizer_new(
 
 
 def get_weight_quantizer_new(
-    qw_mode:str="SAWB+",
-    nbits:int=32,
-    clip_val:torch.FloatTensor=None,
-    clip_valn:torch.FloatTensor=None,
-    align_zero:bool=True,
-    w_shape:torch.Size=None,
-    recompute:bool=False,  # pylint: disable=unused-argument
-    perGp:int=None,
-    use_PT_native_Qfunc:bool=False,
-    use_subnormal:bool=False,
+    qw_mode: str = "SAWB+",
+    nbits: int = 32,
+    clip_val: torch.FloatTensor = None,
+    clip_valn: torch.FloatTensor = None,
+    align_zero: bool = True,
+    w_shape: torch.Size = None,
+    recompute: bool = False,  # pylint: disable=unused-argument
+    perGp: int = None,
+    use_PT_native_Qfunc: bool = False,
+    use_subnormal: bool = False,
 ):
     """Return a quantizer for weight quantization
     Regular quantizers:
@@ -236,13 +237,7 @@ def get_weight_quantizer_new(
     Ngrp = (
         [w_shape[0] * w_shape[1] // perGp, perGp] if "perGp" in qw_mode else False
     )  # store clip_val size and group size
-    unit = (
-        "perCh"
-        if Nch is not False
-        else "perGrp"
-        if perGp is not None
-        else "perT"
-    )
+    unit = "perCh" if Nch is not False else "perGrp" if perGp is not None else "perT"
     if "sawb" in qw_mode:
         clipSTE = "+" in qw_mode
         weight_quantizer = SAWB_new(
@@ -260,7 +255,6 @@ def get_weight_quantizer_new(
             use_PT_native_Qfunc=use_PT_native_Qfunc,
         )
     elif "max" in qw_mode:
-        
         weight_quantizer = Qmax_new(
             nbits,
             Qscheme=Qscheme(
 
@@ -13,12 +13,13 @@
 # limitations under the License.
 
 """
-Linear Quantization Utility functions 
+Linear Quantization Utility functions
 
 Raises:
     ValueError: Lower clip value is less than 0 for symmetric quantization
 """
 
+# Standard
 from typing import Tuple
 
 # Third Party
@@ -288,7 +289,7 @@ def asymmetric_linear_quantization_params(
         diff = sat_max - sat_min
         # If float values are all 0, we just want the quantized values to be 0 as well.
         # So overriding the saturation value to 'n', so the scale becomes 1
-        diff[ diff == 0.0 ] = n_levels
+        diff[diff == 0.0] = n_levels
         scale = diff / n_levels
         zero_point = -sat_min / scale
         if integral_zero_point:
@@ -310,7 +311,7 @@ def symmetric_linear_quantization_params(
         num_bits (torch.IntTensor): Number of bits for quantization.
         sat_max (torch.FloatTensor): Upper clip value.  Can be multi-valued (perCh/perGp).
         qlevel_lowering (bool, optional): Specify lowering of quantized levels. Defaults to False.
-        Ngp_or_ch (int, optional): 
+        Ngp_or_ch (int, optional):
 
     Returns:
         [torch.IntTensor, torch.FloatTensor, torch.FloatTensor]:
@@ -326,11 +327,12 @@ def symmetric_linear_quantization_params(
         # If float values are all 0, we just want the quantized values to be 0 as well.
         # So overriding the saturationvalue to '2n', so the scale becomes 1
         diff = 2 * sat_val
-        diff[ diff == 0.0 ] = n_levels
+        diff[diff == 0.0] = n_levels
         scale = diff / n_levels
         zero_point = torch.zeros_like(scale)
         return n_levels, scale, zero_point
-    
+
+
 def per_channel_axis(
     scale: torch.FloatTensor,
     zero_point: torch.IntTensor,
@@ -349,7 +351,7 @@ def per_channel_axis(
         tensor_shape (torch.Size): Shape of quantized tensor
 
     Returns:
-        scale, zero_point: 
+        scale, zero_point:
     """
     if axis == 0:
         scale = scale.unsqueeze(1)
@@ -359,9 +361,9 @@ def per_channel_axis(
         zero_point = zero_point.unsqueeze(0)
     else:
         raise ValueError("Axis must be 0 or 1")
-    
+
     # Check that tensor shape axis is same as scale/zp broadcast
     assert tensor_shape[axis] == scale.shape[axis]
     assert tensor_shape[axis] == zero_point.shape[axis]
-    
+
     return scale, zero_point
@@ -29,14 +29,14 @@
 import torch
 
 # Local
-from fms_mo.quant_refactor.base_quant import Quantizer, Qscheme
-from fms_mo.quant_refactor.per_tensor_ste import PerTensorSTE
+from fms_mo.quant_refactor.base_quant import Qscheme, Quantizer
 from fms_mo.quant_refactor.linear_utils import (
     asymmetric_linear_quantization_params,
     linear_dequantize,
     linear_quantize_LSQresidual,
     qint_bounds,
 )
+from fms_mo.quant_refactor.per_tensor_ste import PerTensorSTE
 
 clip_valn_default = torch.tensor(-8.0)
 clip_val_default = torch.tensor(8.0)
@@ -49,6 +49,7 @@
     qlevel_lowering=False,
 )
 
+
 class LSQQuantization_new(Quantizer):
     """
     LSQ Quantizer
@@ -64,7 +65,7 @@ def __init__(
         init_clip_val: torch.FloatTensor = clip_val_default,
         qscheme=qscheme_per_tensor,
         dequantize: bool = True,
-        **kwargs
+        **kwargs,
     ):
         """
         Init LSQ Quantizer
@@ -135,8 +136,9 @@ def forward(
             torch.Tensor: Dequantized or Quantized output tensor.
         """
 
-        clip_valn, clip_val = clip_valn.to(input_tensor.dtype), clip_val.to(
-            input_tensor.dtype
+        clip_valn, clip_val = (
+            clip_valn.to(input_tensor.dtype),
+            clip_val.to(input_tensor.dtype),
         )
 
         n_levels, scale, zero_point = asymmetric_linear_quantization_params(
@@ -205,7 +207,7 @@ def __init__(
         init_clip_val: torch.FloatTensor = clip_val_default,
         qscheme=qscheme_per_tensor,
         dequantize: bool = True,
-        **kwargs
+        **kwargs,
     ):
         """
         Init LSQ+ Quantizer
 
@@ -20,11 +20,8 @@
 import torch
 
 # Local
-from fms_mo.quant_refactor.base_quant import Quantizer, Qscheme
-from fms_mo.quant_refactor.per_tensor_ste import (
-    PerTensorSTE,
-    PerTensorSTE_PTnative,
-)
+from fms_mo.quant_refactor.base_quant import Qscheme, Quantizer
+from fms_mo.quant_refactor.per_tensor_ste import PerTensorSTE, PerTensorSTE_PTnative
 
 clip_valn_default = torch.tensor(-8.0)
 clip_val_default = torch.tensor(8.0)
@@ -37,6 +34,7 @@
     qlevel_lowering=False,
 )
 
+
 class PACT2_new(Quantizer):
     """
     Two-sided original PACT
@@ -52,7 +50,7 @@ def __init__(
         qscheme: Qscheme = qscheme_per_tensor,
         dequantize: bool = True,
         pact_plus: bool = True,
-        **kwargs
+        **kwargs,
     ):
         """
         Init PACT2 quantizer
 
@@ -20,11 +20,8 @@
 import torch
 
 # Local
-from fms_mo.quant_refactor.base_quant import Quantizer, Qscheme
-from fms_mo.quant_refactor.per_tensor_ste import (
-    PerTensorSTE,
-    PerTensorSTE_PTnative,
-)
+from fms_mo.quant_refactor.base_quant import Qscheme, Quantizer
+from fms_mo.quant_refactor.per_tensor_ste import PerTensorSTE, PerTensorSTE_PTnative
 
 clip_valn_default = torch.tensor(-8.0)
 clip_val_default = torch.tensor(8.0)
@@ -37,6 +34,7 @@
     qlevel_lowering=False,
 )
 
+
 class PACT2Sym_new(Quantizer):
     """
     Two-sided PACT with symmetric clip values
@@ -52,7 +50,7 @@ def __init__(
         init_clip_val: torch.FloatTensor = clip_val_default,
         qscheme: Qscheme = qscheme_per_tensor,
         dequantize: bool = True,
-        **kwargs
+        **kwargs,
     ):
         """
         Init PACT2Sym quantizer
 
@@ -20,11 +20,8 @@
 import torch
 
 # Local
-from fms_mo.quant_refactor.base_quant import Quantizer, Qscheme
-from fms_mo.quant_refactor.per_tensor_ste import (
-    PerTensorSTE,
-    PerTensorSTE_PTnative,
-)
+from fms_mo.quant_refactor.base_quant import Qscheme, Quantizer
+from fms_mo.quant_refactor.per_tensor_ste import PerTensorSTE, PerTensorSTE_PTnative
 
 clip_valn_default = torch.tensor(0.0)
 clip_val_default = torch.tensor(8.0)
@@ -37,6 +34,7 @@
     qlevel_lowering=False,
 )
 
+
 class PACT_new(Quantizer):
     """
     1-sided original PACT
@@ -54,7 +52,7 @@ def __init__(
         qscheme: Qscheme = qscheme_per_tensor,
         dequantize: bool = True,
         pact_plus: bool = True,
-        **kwargs
+        **kwargs,
     ):
         """
         Initialize PACT quantizer
 
@@ -20,11 +20,8 @@
 import torch
 
 # Local
-from fms_mo.quant_refactor.base_quant import Quantizer, Qscheme
-from fms_mo.quant_refactor.per_tensor_ste import (
-    PerTensorSTE,
-    PerTensorSTE_PTnative,
-)
+from fms_mo.quant_refactor.base_quant import Qscheme, Quantizer
+from fms_mo.quant_refactor.per_tensor_ste import PerTensorSTE, PerTensorSTE_PTnative
 
 clip_valn_default = torch.tensor(-8.0)
 clip_val_default = torch.tensor(8.0)
@@ -37,6 +34,7 @@
     qlevel_lowering=False,
 )
 
+
 class PACTplusSym_new(Quantizer):
     """
     Two-sided symmetric PACT+
@@ -54,7 +52,7 @@ def __init__(
         qscheme: Qscheme = qscheme_per_tensor,
         dequantize: bool = True,
         extend_act_range: bool = False,
-        **kwargs
+        **kwargs,
     ):
         """
         Init PACT+Sym quantizer