Merge pull request #26 from andrea-fasoli/clip_symmetry

chichun-charlie-liu · web-flow · commit 9d7f1521d374 · 2024-12-11T13:40:02.000-05:00
Fix symmetric behavior (issue #22)
diff --git a/fms_mo/calib.py b/fms_mo/calib.py
@@ -72,17 +72,35 @@ def __call__(self, module, inputs: torch.Tensor):
         with torch.no_grad():
             x = inputs[0].detach()  # TODO: still need detach() under no_grad context?
 
+            symmetric = False
+            if module.quantize_feature:
+                # default to asymmetric clip_val computation
+                # TODO: this misses symmetry of PACTPlusSym, PACT2Sym, and QFixSymmetric
+                symmetric = not getattr(module.quantize_feature, "minmax", True)
+
             nelem = x.nelement()
             if self.a_init_method == "percentile":
                 lower_k = int(self.per[0] * nelem)
-                lower_per_cur = (
+                lower_per_cur_candidate = (
                     x.reshape(1, -1).kthvalue(lower_k).values.data[0]
                     if lower_k > 0
                     else x.min()
                 )  # guard rail: tensors with very few elements could cause kthvalue(0) error
-                upper_per_cur = (
+                upper_per_cur_candidate = (
                     x.reshape(1, -1).kthvalue(int(self.per[1] * nelem)).values.data[0]
                 )
+                if symmetric:
+                    upper_per_cur = max(
+                        upper_per_cur_candidate,
+                        lower_per_cur_candidate.abs(),
+                    )
+                    lower_per_cur = -upper_per_cur
+                else:
+                    upper_per_cur = upper_per_cur_candidate
+                    lower_per_cur = lower_per_cur_candidate
+            elif symmetric:
+                upper_per_cur = x.abs().max()
+                lower_per_cur = -upper_per_cur
             else:
                 lower_per_cur = x.min()
                 upper_per_cur = x.max()
diff --git a/fms_mo/modules/linear.py b/fms_mo/modules/linear.py
@@ -162,12 +162,17 @@ def __init__(
                 use_subnormal=self.fp8_use_subnormal,
             )
             if self.calib_counter > 0:
+                qa_mode_calib = (
+                    self.qa_mode_calib + "sym"
+                    if self.qa_mode.endswith("sym")
+                    else self.qa_mode_calib
+                )
                 self.quantize_calib_feature = Qdynamic(
                     self.num_bits_feature,
                     qcfg,
                     non_neg=self.non_neg,
                     align_zero=self.align_zero,
-                    qmode=self.qa_mode_calib,
+                    qmode=qa_mode_calib,
                     quantizer2sync=self.quantize_feature,
                 )
 
diff --git a/fms_mo/quant/quantizers.py b/fms_mo/quant/quantizers.py
@@ -3512,7 +3512,7 @@ def __init__(
         """
         super().__init__()
         self.num_bits = num_bits
-        self.symmetric = symmetric or qmode.endswith("_sym")
+        self.symmetric = symmetric or qmode.endswith("sym")
         self.nlevels = (
             2**self.num_bits - 2 if self.symmetric else 2**self.num_bits - 1
         )
@@ -3553,24 +3553,32 @@ def forward(self, input_tensor):
         with torch.no_grad():
             if self.qmode.startswith("percentile"):
                 nelem = input_tensor.nelement()
-                cv_new = (
+                cv_new_candidate = (
                     input_tensor.reshape(1, -1)
                     .float()
                     .kthvalue(
                         round(self.per[1] * 0.01 * nelem)
                     )  # built-in 'round' returns int
                     .values.data[0]
                 ).to(input_tensor.dtype)
+
                 # conventionaly percentile is input_tensor as 99.9 (% is implied),
                 # so we need *0.01 here
                 lower_k = round(self.per[0] * 0.01 * nelem)
-                cvn_new = (
+                cvn_new_candidate = (
                     input_tensor.reshape(1, -1).float().kthvalue(lower_k).values.data[0]
                     if lower_k > 0
                     else input_tensor.min()
                 ).to(
                     input_tensor.dtype
                 )  # for very small tensor, lower_k could be 0, kthvalue(0) will cause error
+
+                if self.symmetric:
+                    cv_new = max(cv_new_candidate, cvn_new_candidate.abs())
+                    cvn_new = -cv_new
+                else:
+                    cv_new = cv_new_candidate
+                    cvn_new = cvn_new_candidate
             elif (
                 self.qmode == "sawb" and self.num_bits == 4
             ):  # only works for PACT+sym for weights
@@ -3579,7 +3587,7 @@ def forward(self, input_tensor):
 
             else:  # i.e., minmax
                 cv_new = input_tensor.max()
-                cvn_new = input_tensor.min()
+                cvn_new = -cv_new if self.symmetric else input_tensor.min()
 
             if self.Niter == 0 and self.training:
                 # to avoid unintended bwd ops added to the graph, cause memory leak sometimes