Change Rounding Policy in Quant Module

federicobrancasi · federicobrancasi · commit e2bc1b4bae53 · 2025-06-10T17:22:32.000+02:00
diff --git a/DeepQuant/QuantManipulation/DequantModifier.py b/DeepQuant/QuantManipulation/DequantModifier.py
@@ -5,7 +5,6 @@
 # Federico Brancasi <fbrancasi@ethz.ch>
 
 import torch.fx as fx
-import torch
 
 from DeepQuant.QuantManipulation.QuantDequantNodes import Dequant
 from DeepQuant.Utils.ConsoleFormatter import ConsoleColor as cc
@@ -76,22 +75,24 @@ def unifyLinearDequants(fxModel: fx.GraphModule, debug: bool = False) -> fx.Grap
         #         otherwise, rely on weight*input
         if biasDequantNode is not None:
             oldBiasDequantMod = fxModel.get_submodule(biasDequantNode.target)
-            dequantScale     = oldBiasDequantMod.scale
+            dequantScale = oldBiasDequantMod.scale
             dequantZeroPoint = oldBiasDequantMod.zeroPoint
-            dequantBitWidth  = oldBiasDequantMod.bitWidth
+            dequantBitWidth = oldBiasDequantMod.bitWidth
             oldDequantMod = oldBiasDequantMod
         else:
-            oldInputDequantMod  = fxModel.get_submodule(inputDequantNode.target)
+            oldInputDequantMod = fxModel.get_submodule(inputDequantNode.target)
             oldWeightDequantMod = fxModel.get_submodule(weightDequantNode.target)
-            dequantScale     = oldWeightDequantMod.scale * oldInputDequantMod.scale
+            dequantScale = oldWeightDequantMod.scale * oldInputDequantMod.scale
             # FCONTI: technically it should be:
             #         dZP = oWDM.zP * oIDM.zP - oWDM.scale * oIDM.zP * sum(weights)
             #         how to appropriately compute sum(weights)?
             #         for now we restrict ourselves to oIDM.zP = 0, so dZP = 0
             if debug and oldInputDequantMod.zeroPoint != 0.0:
-                print(f"Warning: input Dequant node for {node.target} has non-zero zero-point (unsupported). Expect wrong results!")
+                print(
+                    f"Warning: input Dequant node for {node.target} has non-zero zero-point (unsupported). Expect wrong results!"
+                )
             dequantZeroPoint = 0.0
-            dequantBitWidth  = 32 # FCONTI: this is simply a reasonable assumption: is there a less arbitrary one?
+            dequantBitWidth = 32  # FCONTI: this is simply a reasonable assumption: is there a less arbitrary one?
             oldDequantMod = oldWeightDequantMod
 
         for dnode in (inputDequantNode, weightDequantNode):
diff --git a/DeepQuant/QuantManipulation/QuantDequantNodes.py b/DeepQuant/QuantManipulation/QuantDequantNodes.py
@@ -47,7 +47,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 
         xScaled = x / self.scale
         xShifted = xScaled + self.zeroPoint
-        xRounded = torch.round(xShifted)
+        xRounded = torch.floor(xShifted + 0.5)
+
         if self.bitWidth is not None:
             xRounded = torch.clamp(xRounded, self.minVal, self.maxVal)
         return xRounded