fix: Added recompute_clips var to Qmax for eval mode

BrandonGroth · BrandonGroth · commit fbf94e06e7a4 · 2025-07-24T10:55:30.000-04:00
Signed-off-by: Brandon Groth &lt;brandon.m.groth@gmail.com&gt;
diff --git a/fms_mo/quant_refactor/quantizers_new.py b/fms_mo/quant_refactor/quantizers_new.py
@@ -2913,6 +2913,7 @@ def __init__(
         self.perCh = perCh
         self.extend_act_range = extend_act_range
         self.perGp = perGp
+        self.recompute_clips = False
 
         self.set_quantizer()
 
@@ -2997,7 +2998,7 @@ def forward(self, input):
         if len(clipvaln_new.shape) == 0:
             clipvaln_new = clipvaln_new.unsqueeze(dim=0)
 
-        if self.Niter == 0 and self.training:
+        if (self.Niter == 0 and self.training) or self.recompute_clips:
             # to avoid unintended bwd ops added to the graph, cause memory leak sometimes
             with torch.no_grad():
                 # similar to fill_(), will not change id(self.clip_val) but update the values