fix: QminmaxPerCh_PTnative patch for unstable rounding in torch.quantize_per_channel

BrandonGroth · BrandonGroth · commit 0173209fa05a · 2025-07-29T09:53:48.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/fms_mo/quant_refactor/linear_utils.py b/fms_mo/quant_refactor/linear_utils.py
@@ -293,14 +293,10 @@ def asymmetric_linear_quantization_params(
         scale = diff / n_levels
         zero_point = -sat_min / scale
         if integral_zero_point:
-            zero_point = zero_point.round()
+            zero_point = zero_point.round().to(torch.int)
         if signed:
             zero_point += 2 ** (num_bits - 1)
 
-        # Ensure zp in [0, n_levels]
-        zp_bounds = torch.all((zero_point > 0) & (zero_point < n_levels))
-        assert zp_bounds, "Asymmetric zero points should be in [0, 2**bits-1]"
-
         return n_levels, scale, zero_point
 
 
diff --git a/fms_mo/quant_refactor/per_channel_ste.py b/fms_mo/quant_refactor/per_channel_ste.py
@@ -361,7 +361,6 @@ def linear_quantization(
                 quant_max=qint_h,
             ).to(input_tensor.dtype)
         else:
-            # Note: scale is multi-valued, but zero_point isn't...
             output = (
                 torch.quantize_per_channel(
                     input_tensor.float(),
diff --git a/tests/quantizers/test_qmax.py b/tests/quantizers/test_qmax.py
@@ -713,11 +713,16 @@ def test_qmaxnew_asymmetric_perCh(
 
     setup = torch_quantizer_asymmetric_perCh.get_setup()
 
+    # QminmaxPerChSTE_PTnative has a rare numerical problem:
+    # input/scale + zp == (K+.5), then rounding becomes unstable inside torch.quantize_per_channel
+    error_override = base_options["nativePT"] and other_options_perCh["minmax"]
+
     quantizer_error(
         tensor,
         qtensor_fms_mo,
         qtensor_torch,
         setup,
         base_options,
         other_options_perCh,
+        error_override=error_override,
     )
diff --git a/tests/quantizers/test_quantizer_utils.py b/tests/quantizers/test_quantizer_utils.py
@@ -79,6 +79,7 @@ def quantizer_error(
     max_norm_tol=1e-5,
     l2_norm_tol=1e-2,
     nonzero_tol=1e-2,
+    error_override=False,
 ):
     """
     Check various types of quantizer numerical errors for FMS and Torch quantizied tensors
@@ -102,7 +103,7 @@ def quantizer_error(
     """
 
     # If using PyTorch functions, set error tolerances to zero
-    if base_options["nativePT"]:
+    if base_options["nativePT"] and not error_override:
         max_norm_tol = 0.0
         l2_norm_tol = 0.0
         nonzero_tol = 0.0
@@ -207,16 +208,16 @@ def quantizer_error(
         num_bits,
         clip_low,
         clip_high,
+        _n_level,
         scale,
         _zero_point,
-        _n_level,
         _quant_min,
         _quant_max,
         _qscheme,
     ) = setup
 
     # Check if qtensors are constant for non-constant tensor with appropriate spacing of elements
-    if tensor.unique().numel() > 1 and (tensor.max() - tensor.min()) > scale:
+    if tensor.unique().numel() > 1 and (tensor.max() - tensor.min()) > scale.min():
         fms_mo_unique_vals = qtensor_fms_mo.unique()
         torch_unique_vals = qtensor_torch.unique()
 
@@ -271,6 +272,8 @@ def quantizer_error(
 
     assert total_nonscale_nonzero_indices == total_nonzero_indices - total_scale_indices
 
+    # At this point, we don't want to count any potential problems from banker's rounding
+
     with torch.no_grad():
         try:
             # Check for large difference in values for current dtype (ie underflow/overflow)
@@ -322,6 +325,10 @@ def quantizer_error(
             )
             logger.error("Total Diff vals =%s", diff.unique().numel())
             logger.error("Diff unique vals =\n%s", diff.unique().detach())
+
+            logger.error("input tensor =\n%s", tensor)
+            logger.error("torch_scale =\n%s", scale)
+            logger.error("torch_zero_point =\n%s", _zero_point)
             raise e_value  # Reraise exception