feat: Added SAWB perCh PTnative and removed 16 bins

BrandonGroth · BrandonGroth · commit a852ca867b74 · 2025-07-15T20:24:37.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/fms_mo/quant_refactor/per_channel_ste.py b/fms_mo/quant_refactor/per_channel_ste.py
@@ -28,6 +28,7 @@
     per_channel_axis,
 )
 
+default_axis = int(0)
 
 class PerChannelSTE(torch.autograd.Function):
     """Base class for customized forward/backward functions that is NOT using PT native func.
@@ -51,7 +52,7 @@ def forward(
         dequantize: bool = True,
         symmetric: bool = False,
         qlevel_lowering: bool = False,
-        axis: int = 0,
+        axis: int = default_axis,
     ):
         """
         General forward method:
@@ -104,7 +105,7 @@ def calc_qparams(
         clip_val: torch.FloatTensor,
         symmetric: bool = False,
         qlevel_lowering: bool = True,
-        axis: int = 0,
+        axis: int = default_axis,
         tensor_shape: torch.Size = None,
     ):
         """
@@ -195,7 +196,7 @@ def forward(
         dequantize: bool = True,
         symmetric: bool = False,
         qlevel_lowering: bool = False,
-        axis: int = 0,
+        axis: int = default_axis,
     ):
         """
         General forward method:
@@ -293,7 +294,7 @@ def qint_bounds(
         num_bits_int = (
             num_bits.item() if isinstance(num_bits, torch.Tensor) else num_bits
         )
-        if symmetric and zero_point == 0:
+        if symmetric and torch.sum(zero_point) == 0:
             qlevel_symmetric = 1 if qlevel_lowering else 0
             qint_l, qint_h = (
                 -(2 ** (num_bits_int - 1)) + qlevel_symmetric,
@@ -315,7 +316,7 @@ def linear_quantization(
         qint_h: int,
         qint_dtype: torch.dtype,
         dequantize: bool = True,
-        axis: int = 0,
+        axis: int = default_axis,
     ) -> torch.Tensor:
         """
         Linear quantization for PTnative STE
@@ -392,7 +393,7 @@ def forward(
         symmetric: bool = False,
         qlevel_lowering: bool = False,
         use_code: bool = False,
-        axis: int = 0,
+        axis: int = default_axis,
     ):
         """
         General forward method:
@@ -450,7 +451,7 @@ def calc_qparams(
         clip_val: torch.FloatTensor,
         symmetric: bool = False,
         qlevel_lowering: bool = True,
-        axis: int = 0,
+        axis: int = default_axis,
         tensor_shape: torch.Size = None,
         use_code: bool = False,
     ):
@@ -486,4 +487,154 @@ def calc_qparams(
             output = n_levels, scale, zero_point
         else:
             raise ValueError("SAWB has non-symmetric Qscheme")
-        return output
+        return output
+    
+class PerChannelSTESAWB_PTnative(PerChannelSTE_PTnative):
+    """Base class for customized forward/backward functions.
+    There's a family of non-learnable quantizers, such as SAWB, MinMax,
+    whose forward can leverage PT native functions and backward is simply STE.
+    We just need to calculate scale in the upper level quantizer class then those quantizers
+    could all be using the same base "STE function"
+
+    math should be consistent with pytorch: https://pytorch.org/docs/stable/quantization.html
+        x_int = round(x/scale + zp)
+        x_dq  = (x_int - zp) * scale
+
+    This type of class will be used by Quantizer.forward(), e.g.
+    """
+
+    @staticmethod
+    def forward(
+        ctx,
+        input_tensor: torch.FloatTensor,
+        num_bits: torch.IntTensor,
+        clip_valn: torch.FloatTensor,
+        clip_val: torch.FloatTensor,
+        dequantize: bool = True,
+        symmetric: bool = False,
+        qlevel_lowering: bool = False,
+        use_code: bool = False,
+        axis: int = default_axis,
+    ):
+        """
+        General forward method:
+            Set clip values to dtype of input_tensor tensor
+            Compute # of quantized levels, scale, and zero point
+            Perform PTnative linear quantization on input_tensor tensor
+            return output
+
+        Args:
+            ctx (torch.autograd.Function): Forward/Backward context object.
+            input_tensor_tensor (torch.FloatTensor): Tensor to be quantized.
+            num_bits (torch.IntTensor): Number of bit for quantization.
+            clip_valn (torch.FloatTensor): Lower clip value bound.
+            clip_val (torch.FloatTensor): Upper clip value bound.
+            dequantize (bool, optional): Return dequantized or int tensor. Defaults to True.
+            symmetric (bool, optional): Specify if clip values are symmetric. Defaults to False.
+            qlevel_lowering (bool, optional): Specify lowering of quantized levels.
+                Defaults to True.
+            axis (int, optional): Specify which tensor dimension to quantize indiviually.
+                Defaults to 0.
+
+        Returns:
+            torch.Tensor: Dequantized or Quantized output tensor.
+        """
+        clip_valn, clip_val = transform_clips(
+            input_tensor.dtype,
+            clip_valn,
+            clip_val,
+        )
+        (
+            _,
+            scale,
+            zero_point,
+            qint_l,
+            qint_h,
+            qint_dtype,
+        ) = PerChannelSTESAWB_PTnative.calc_qparams(
+            num_bits, clip_valn, clip_val, symmetric, qlevel_lowering,
+        )
+        output = PerChannelSTE_PTnative.linear_quantization(
+            input_tensor, scale, zero_point, qint_l, qint_h, qint_dtype, dequantize, axis
+        )
+        return output
+
+    @classmethod
+    def calc_qparams(
+        cls,
+        num_bits: torch.IntTensor,
+        clip_valn: torch.FloatTensor,
+        clip_val: torch.FloatTensor,
+        symmetric: bool = False,
+        qlevel_lowering: bool = False,
+    ) -> Tuple[torch.IntTensor, torch.FloatTensor, torch.IntTensor, int, int]:
+        """
+        Compute the scale and zero_point from num_bits and clip values.
+        Also, compute qint bounds for PT clamping.
+
+        Args:
+            num_bits (torch.IntTensor): Number of bit for quantization.
+            clip_valn (torch.FloatTensor): Lower clip value.
+            clip_val (torch.FloatTensor): Upper clip value.
+            symmetric (bool, optional): Specify if clip values are symmetric. Defaults to False.
+            qlevel_lowering (bool, optional): Specify lowering of quantized levels.
+                Defaults to True.
+
+        Returns:
+            Tuple[torch.IntTensor, torch.FloatTensor, torch.IntTensor]: Quantized parameters
+        """
+        n_levels = 2**num_bits - 2 if qlevel_lowering else 2**num_bits - 1
+        scale = (clip_val - clip_valn) / n_levels
+        zero_point = (
+            torch.zeros_like(scale)
+            if symmetric
+            else torch.round(-clip_valn / scale).to(torch.int)
+        )
+        qint_l, qint_h, qint_dtype = PerChannelSTESAWB_PTnative.qint_bounds(
+            num_bits, zero_point, symmetric, qlevel_lowering
+        )
+        # Note: fake_quantize_per_channel_affine does not need matching dimensions for scale/zp to tensor
+        return n_levels, scale, zero_point, qint_l, qint_h, qint_dtype
+    
+    @classmethod
+    def qint_bounds(
+        cls,
+        num_bits: torch.IntTensor,
+        zero_point: torch.IntTensor,
+        symmetric: bool = False,
+        qlevel_lowering: bool = True,
+    ) -> Tuple[int, int, torch.dtype]:
+        """
+        qlevel_symmetric: shift qlevel from [-2**(b-1), 2**(b-1)-1] to [-2**(b-1)+1, 2**(b-1)-1]
+        For int8: [-127,127] ; For int4 [-7,7]
+        qint bounds must be ints, not tensors
+        """
+        num_bits_int = (
+            num_bits.item() if isinstance(num_bits, torch.Tensor) else num_bits
+        )
+        if symmetric and torch.sum(zero_point) == 0:
+            qlevel_symmetric = 1 if qlevel_lowering else 0
+            qint_l, qint_h = (
+                -(2 ** (num_bits_int - 1)) + qlevel_symmetric,
+                2 ** (num_bits_int - 1) - 1,
+            )
+            qint_dtype = torch.qint8
+        else:  # single_sided or zero_point != 0
+            qint_l, qint_h = 0, 2**num_bits_int - 1
+            qint_dtype = torch.quint8
+        return qint_l, qint_h, qint_dtype
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        """
+        General STE backward method:
+            Return grad_output + None args to match forward input_tensor
+
+        Args:
+            ctx (torch.autograd.Function): Forward/Backward context object.
+            grad_output (torch.FloatTensor): Gradient tensor
+
+        Returns:
+            torch.FloatTensor, None,...,None: STE Gradient
+        """
+        return grad_output, None, None, None, None, None, None
diff --git a/fms_mo/quant_refactor/sawb_new.py b/fms_mo/quant_refactor/sawb_new.py
@@ -29,7 +29,7 @@
 )
 from fms_mo.quant_refactor.per_channel_ste import (
     PerChannelSTESAWB,
-    # PerChannelSTESAWB_PTnative,
+    PerChannelSTESAWB_PTnative,
 )
 from fms_mo.quant_refactor.linear_utils import linear_dequantize, linear_quantize
 from fms_mo.quant_refactor.sawb_utils import sawb_params, sawb_params_code
@@ -132,14 +132,10 @@ def set_quantizer(self):
         if self.use_PT_native_Qfunc:
             if self.perCh:
                 self.use_code = self.qscheme.qlevel_lowering
-                # self.quantizer = PerChannelSTESAWB_PTnative
+                self.quantizer = PerChannelSTESAWB_PTnative
             else:
-                # if self.use_extended_range_4bits:
-                #     self.use_code = True
-                #     self.quantizer = SAWBPlus16ZeroSTE_PTnative
-                # else:
-                    self.use_code = self.qscheme.qlevel_lowering
-                    self.quantizer = PerTensorSTESAWB_PTnative
+                self.use_code = self.qscheme.qlevel_lowering
+                self.quantizer = PerTensorSTESAWB_PTnative
 
         else:  # Non-PTnative quantizers
             self.use_code = self.qscheme.qlevel_lowering
@@ -148,8 +144,6 @@ def set_quantizer(self):
                     self.quantizer = (
                         SAWBPlusZeroPerChSTE_new
                         if self.perCh and self.num_bits in [2, 4, 8]
-                        # else SAWBPlus16ZeroSTE_new
-                        # if self.extended_ranged and self.num_bits == 4
                         else SAWBPlusZeroSTE_new
                     )
                 else:
@@ -558,79 +552,11 @@ def backward(ctx, grad_output):
 #         return n_levels, clip_val, scale, zero_point, qint_l, qint_h, qint_dtype
 
 
-# Placeholder classes for PerCh - need to rework #
 class SAWBPlusZeroPerChSTE_new(PerChannelSTESAWB):
     """
     per-channel SAWB with zero alignment, ca,8n use 15 or 16 bins, i.e. [-7,7] or [-7]
     """
 
-    # @staticmethod
-    # def forward(
-    #     ctx,
-    #     input_tensor: torch.FloatTensor,
-    #     num_bits: torch.IntTensor,
-    #     _clip_valn: torch.FloatTensor = clip_valn_default,
-    #     clip_val: torch.FloatTensor = clip_val_default,
-    #     dequantize: bool = True,
-    #     _symmetric: bool = False,
-    #     _qlevel_lowering: bool = False,
-    #     _use_code: bool = False,
-    # ):
-    #     """
-    #     Forward function for SAWBPlusZeroPerChSTE
-
-    #     Args:
-    #         ctx (torch.autograd.Function): Forward/Backward context object.
-    #         input_tensor (torch.FloatTensor): Tensor to be quantized.
-    #         num_bits (torch.IntTensor): Number of bit for quantization.
-    #         clip_valn (torch.FloatTensor): Lower clip value bound.
-    #         clip_val (torch.FloatTensor): Upper clip value bound.
-    #         dequantize (bool, optional): Return dequantized or int tensor. Defaults to True.
-    #         symmetric (bool, optional): Specify if clip values are symmetric. Defaults to False.
-    #         qlevel_lowering (bool, optional): Specify lowering of quantized levels.
-    #             Defaults to True.
-    #         use_code (bool, optional): Specify using SAWB code. Defaults to False.
-
-    #     Returns:
-    #         torch.Tensor: Dequantized or Quantized output tensor.
-    #     """
-    #     # assert num_bits in [4, 8], "only implemented for 4bit and 8bit"
-
-    #     SAWBcode_mapping = {8: 803, 4: 403, 2: 103}
-    #     num_bits_int = (
-    #         num_bits.item() if isinstance(num_bits, torch.Tensor) else num_bits
-    #     )
-    #     clip_val, _ = sawb_params_code(
-    #         num_bits_int, SAWBcode_mapping[num_bits_int], input_tensor, perCh=True
-    #     )
-
-    #     _nspace = 2**num_bits - 2  # + objSAWB.use16bins # Ignore 16bins for now
-    #     int_l = -(2 ** (num_bits - 1)) + 1
-    #     int_u = -int_l  # + objSAWB.use16bins # Ignore 16bins for now
-
-    #     scale = clip_val * 2 / (2**num_bits - 2)
-    #     # original SAWB assumes odd number of bins when calc clip_val
-    #     zero_point = torch.zeros_like(scale)  # SAWB always centers around 0 and align 0
-
-    #     if dequantize:
-    #         output = torch.fake_quantize_per_channel_affine(
-    #             input_tensor.float(),
-    #             scale.float(),
-    #             zero_point.float(),
-    #             axis=0,
-    #             quant_min=int_l,
-    #             quant_max=int_u,
-    #         ).to(
-    #             clip_val.dtype
-    #         )  # NOTE return will be a fp32 tensor; function only support float()
-    #     else:
-    #         output = torch.quantize_per_channel(
-    #             input_tensor, scale, zero_point, 0, torch.qint8
-    #         ).int_repr()
-    #         # NOTE return will be a torch.int8 tensor
-
-    #     return output
-
     @staticmethod
     def backward(ctx, grad_output):
         """
@@ -645,3 +571,4 @@ def backward(ctx, grad_output):
         """
         grad_input = grad_output.clone()
         return grad_input, None, None, None, None, None, None, None
+