Add checkEquivalence Flag to brevitasToTrueQuant function

federicobrancasi · federicobrancasi · commit 22775439cedf · 2025-06-11T21:35:29.000+02:00
diff --git a/DeepQuant/Export.py b/DeepQuant/Export.py
@@ -22,6 +22,7 @@ def brevitasToTrueQuant(
     exampleInput: torch.Tensor,
     exportPath: Optional[Union[str, Path]] = Path.cwd() / "Tests" / "ONNX",
     debug: bool = False,
+    checkEquivalence: bool = False,
 ) -> nn.Module:
     """
     Export a Brevitas model to an FX GraphModule with unrolled quantization operations.
@@ -35,16 +36,18 @@ def brevitasToTrueQuant(
 
     # Pipeline Step 2: Inject custom forward implementations
     transformedModel, transformedOutput = injectCustomForwards(
-        tracedModel, exampleInput, originalOutput, debug
+        tracedModel, exampleInput, originalOutput, debug, checkEquivalence
     )
 
     # Pipeline Step 3: Split quantization nodes
     splitModel, splitOutput = splitQuantNodes(
-        transformedModel, exampleInput, transformedOutput, debug
+        transformedModel, exampleInput, transformedOutput, debug, checkEquivalence
     )
 
     # Pipeline Step 4: Unify dequant nodes
-    unifiedModel, _ = mergeDequants(splitModel, exampleInput, splitOutput, debug)
+    unifiedModel, _ = mergeDequants(
+        splitModel, exampleInput, splitOutput, debug, checkEquivalence
+    )
 
     # Pipeline Step 5: Export to ONNX
     onnxFile, _ = exportToOnnx(unifiedModel, exampleInput, exportPath, debug)
diff --git a/DeepQuant/Pipeline/DequantUnify.py b/DeepQuant/Pipeline/DequantUnify.py
@@ -20,6 +20,7 @@ def mergeDequants(
     exampleInput: torch.Tensor,
     referenceOutput: torch.Tensor,
     debug: bool = False,
+    checkEquivalence: bool = False,
 ) -> Tuple[nn.Module, torch.Tensor]:
     """
     Unify dequantization nodes to enable integer-only computation.
@@ -78,12 +79,13 @@ def mergeDequants(
         output = unifiedModel(exampleInput)
 
     # FBRANCASI: Check output equivalence with a warning instead of error
-    if not torch.allclose(referenceOutput, output, atol=1e-5) and debug:
-        print(
-            cc.warning(
-                "Modification of Dequant Nodes may have changed the output slightly"
+    if checkEquivalence:
+        if not torch.allclose(referenceOutput, output, atol=1e-5) and debug:
+            print(
+                cc.warning(
+                    "Modification of Dequant Nodes may have changed the output slightly"
+                )
             )
-        )
 
     if debug:
         # FBRANCASI: Register hooks for the unified model and compare tensors
diff --git a/DeepQuant/Pipeline/Injection.py b/DeepQuant/Pipeline/Injection.py
@@ -25,6 +25,7 @@ def injectCustomForwards(
     exampleInput: torch.Tensor,
     referenceOutput: torch.Tensor,
     debug: bool = False,
+    checkEquivalence: bool = False,
 ) -> Tuple[nn.Module, torch.Tensor]:
     """Inject custom forward implementations into the model."""
     printer = GraphModulePrinter()
@@ -49,13 +50,14 @@ def injectCustomForwards(
     with torch.no_grad():
         output = fxModel(exampleInput)
 
-    if torch.allclose(referenceOutput, output, atol=1e-5):
-        if debug:
-            print(cc.success("Injection of New Modules: output is consistent"))
-    else:
-        raise RuntimeError(
-            cc.error("Injection of New Modules changed the output significantly")
-        )
+    if checkEquivalence:
+        if torch.allclose(referenceOutput, output, atol=1e-5):
+            if debug:
+                print(cc.success("Injection of New Modules: output is consistent"))
+        else:
+            raise RuntimeError(
+                cc.error("Injection of New Modules changed the output significantly")
+            )
 
     if debug:
         print(cc.header("2. Network after Injection of New Modules"))
diff --git a/DeepQuant/Pipeline/QuantSplit.py b/DeepQuant/Pipeline/QuantSplit.py
@@ -23,6 +23,7 @@ def splitQuantNodes(
     exampleInput: torch.Tensor,
     referenceOutput: torch.Tensor,
     debug: bool = False,
+    checkEquivalence: bool = False,
 ) -> Tuple[nn.Module, torch.Tensor]:
     """
     Split quantization nodes into separate Quant and Dequant nodes.
@@ -44,13 +45,14 @@ def splitQuantNodes(
     with torch.no_grad():
         output = splitModel(exampleInput)
 
-    if torch.allclose(referenceOutput, output, atol=1e-5):
-        if debug:
-            print(cc.success("Split of Quant Nodes: output is consistent"))
-    else:
-        raise RuntimeError(
-            cc.error("Split of Quant Nodes changed the output significantly")
-        )
+    if checkEquivalence:
+        if torch.allclose(referenceOutput, output, atol=1e-5):
+            if debug:
+                print(cc.success("Split of Quant Nodes: output is consistent"))
+        else:
+            raise RuntimeError(
+                cc.error("Split of Quant Nodes changed the output significantly")
+            )
 
     if debug:
         print(cc.header("3. Network after Split of Quant Nodes"))
diff --git a/Tests/TestConv.py b/Tests/TestConv.py
@@ -53,4 +53,4 @@ def deepQuantTestConv() -> None:
     torch.manual_seed(42)
     model = QuantConvNet().eval()
     sampleInput = torch.randn(1, 1, 28, 28)
-    brevitasToTrueQuant(model, sampleInput, debug=True)
+    brevitasToTrueQuant(model, sampleInput, debug=True, checkEquivalence=True)
diff --git a/Tests/TestLinear.py b/Tests/TestLinear.py
@@ -46,4 +46,4 @@ def deepQuantTestLinear() -> None:
     torch.manual_seed(42)
     model = QuantLinearNet().eval()
     sampleInput = torch.randn(1, 4, 16)
-    brevitasToTrueQuant(model, sampleInput, debug=True)
+    brevitasToTrueQuant(model, sampleInput, debug=True, checkEquivalence=True)
diff --git a/Tests/TestMHSA.py b/Tests/TestMHSA.py
@@ -56,4 +56,4 @@ def deepQuantTestMHSA() -> None:
     torch.manual_seed(42)
     model = QuantMHSANet(embedDim=16, numHeads=4).eval()
     sampleInput = torch.randn(10, 2, 16)
-    brevitasToTrueQuant(model, sampleInput)
+    brevitasToTrueQuant(model, sampleInput, checkEquivalence=True)
diff --git a/Tests/TestSimpleCNN.py b/Tests/TestSimpleCNN.py
@@ -82,4 +82,4 @@ def deepQuantTestSimpleCNN() -> None:
     torch.manual_seed(42)
     model = SimpleQuantCNN().eval()
     sampleInput = torch.randn(1, 1, 28, 28)
-    brevitasToTrueQuant(model, sampleInput, debug=True)
+    brevitasToTrueQuant(model, sampleInput, debug=True, checkEquivalence=True)