Update quantizers.py

chichun-charlie-liu · web-flow · commit a418e297fcf0 · 2025-04-10T13:50:25.000-04:00
Signed-off-by: chichun-charlie-liu &lt;57839396+chichun-charlie-liu@users.noreply.github.com&gt;
diff --git a/fms_mo/quant/quantizers.py b/fms_mo/quant/quantizers.py
@@ -3945,13 +3945,13 @@ def init_quantization_scale(self, x: torch.Tensor, channel_wise: bool = False):
                 delta = float(x_max - x_min) / (self.n_levels - 1)
                 if delta < 1e-8:
                     logger.info(f"Quantization range close to zero: [{x_min}, {x_max}]")
-                    delta = 1e-8  # type: ignore[assignment]
+                    delta = 1e-8
 
                 zero_point = round(-x_min / delta)
 
             elif self.scale_method == "mse":
                 x_max = x.max()
-                x_min = x.min()  # type: ignore[assignment]
+                x_min = x.min()
                 best_score = 1e10
                 for i in range(80):
                     new_max = x_max * (1.0 - (i * 0.01))
@@ -5454,7 +5454,7 @@ def custom_fp8_quantizer(
     mantissa_bits: int = 3,
     use_subnormal: bool = False,
     scale_to_max: bool = False,
-) -> torch.Tensor:
+):
     """Convert tensor tensor to FP8 format, remanining in decimal form (no binary conversion)
     and using some clever manipulation to round each tensor values to the closest representable
     FP8 value.