Revert "Add helper functions for Q/DQ folding pass"

digantdesai · digantdesai · commit fc926e699cea · 2024-12-17T21:50:23.000-06:00
This reverts commit fd9eb28. Signed-off-by: Digant Desai <digantdesai@meta.com>
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -16,32 +16,6 @@
 from torch.fx import GraphModule, Node
 
 
-def get_input_qparams(node: Node) -> dict[int, QuantArgs]:
-    """
-    Get the input quantization parameters from a node, set by the 'FoldAndAnnotateQParamsPass'.
-    Raises a ValueError if the node doesn't have any parameters set.
-    """
-    if "input_qparams" not in node.meta.keys():
-        raise ValueError(f"No input quantization parameter found in node {node}")
-    input_qparams = cast(dict[int, QuantArgs], node.meta["input_qparams"])
-    if len(input_qparams) == 0:
-        raise ValueError(f"No input quantization parameter found in node {node}")
-    return input_qparams
-
-
-def get_output_qparams(node: Node) -> dict[int, QuantArgs]:
-    """
-    Get the output quantization parameters from a node, set by the 'FoldAndAnnotateQParamsPass'.
-    Raises a ValueError if the node doesn't have any parameters set.
-    """
-    if "output_qparams" not in node.meta.keys():
-        raise ValueError(f"No output quantization parameter found in node {node}")
-    input_qparams = cast(dict[int, QuantArgs], node.meta["output_qparams"])
-    if len(input_qparams) == 0:
-        raise ValueError(f"No output quantization parameter found in node {node}")
-    return input_qparams
-
-
 class FoldAndAnnotateQParamsPass(ExportPass):
     """
     A pass that walks the graph and removes any DQ and Q nodes before and after the target
diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py
@@ -76,7 +76,9 @@ def define_node(
         if output.dtype == ts.DType.INT8:
             # Scale output back to 8 bit
             # pyre-ignore
-            tqutils.insert_rescale_op_to_int8(tosa_graph, add_output, scale_back, node)
+            tqutils.insert_rescale_node_back_to_int8(
+                tosa_graph, add_output, scale_back, node
+            )
 
 
 @register_node_visitor
diff --git a/backends/arm/operators/op_max.py b/backends/arm/operators/op_max.py
@@ -5,13 +5,11 @@
 
 # pyre-unsafe
 
-from typing import List
+from typing import cast, List
 
 import executorch.backends.arm.tosa_quant_utils as tqutils
+
 import serializer.tosa_serializer as ts
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-    get_input_qparams,
-)
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -40,23 +38,30 @@ def define_node(
     ) -> None:
         assert inputs[0].dtype == inputs[1].dtype
 
-        max_output = output
+        input_qparams = cast(dict[int, tqutils.QuantArgs], node.meta["input_qparams"])
+        min_output = output
+
         if inputs[0].dtype == ts.DType.INT8:
-            input_qparams = get_input_qparams(node)
-            assert (
-                len(input_qparams) == 2
-            ), f"Both inputs needs to have quantization information for {node}"
             # insert RESCALEs to int32
+            x_scale = input_qparams[0].scale
+            x_zp = input_qparams[0].zp
+
+            y_scale = input_qparams[1].scale
+            y_zp = input_qparams[1].zp
+
             assert (
-                input_qparams[0] == input_qparams[1]
-            ), "Both inputs must have same quantization for MAX"
+                x_zp == y_zp
+            ), "Different zp for inputs, MAX should be quantized with shared quantization!"
+            assert (
+                x_scale == y_scale
+            ), "Different scale for input, MAX should be quantized with shared quantization!"
 
             operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
                 tosa_graph, inputs, node
             )
 
             output.shape = tosa_shape(output.shape, output.dim_order)
-            max_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
+            min_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
         else:
             operand_inputs = inputs
 
@@ -66,9 +71,11 @@ def define_node(
                 operand_inputs[0].name,
                 operand_inputs[1].name,
             ],
-            [max_output.name],
+            [min_output.name],
         )
 
         if output.dtype == ts.DType.INT8:
             # insert RESCALE from int32 back to int8
-            tqutils.insert_rescale_op_to_int8(tosa_graph, max_output, scale_back, node)
+            tqutils.insert_rescale_node_back_to_int8(
+                tosa_graph, min_output, scale_back, node
+            )
diff --git a/backends/arm/operators/op_min.py b/backends/arm/operators/op_min.py
@@ -5,14 +5,11 @@
 
 # pyre-unsafe
 
-from typing import List
+from typing import cast, List
 
 import executorch.backends.arm.tosa_quant_utils as tqutils
 
 import serializer.tosa_serializer as ts
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-    get_input_qparams,
-)
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -41,16 +38,23 @@ def define_node(
     ) -> None:
         assert inputs[0].dtype == inputs[1].dtype
 
+        input_qparams = cast(dict[int, tqutils.QuantArgs], node.meta["input_qparams"])
         min_output = output
+
         if inputs[0].dtype == ts.DType.INT8:
-            input_qparams = get_input_qparams(node)
-            assert (
-                len(input_qparams) == 2
-            ), f"Both inputs needs to have quantization information for {node}"
             # insert RESCALEs to int32
+            x_scale = input_qparams[0].scale
+            x_zp = input_qparams[0].zp
+
+            y_scale = input_qparams[1].scale
+            y_zp = input_qparams[1].zp
+
+            assert (
+                x_zp == y_zp
+            ), "Different zp for inputs, MIN should be quantized with shared quantization!"
             assert (
-                input_qparams[0] == input_qparams[1]
-            ), "Both inputs must have same quantization for MIN"
+                x_scale == y_scale
+            ), "Different scale for input, MIN should be quantized with shared quantization!"
 
             operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
                 tosa_graph, inputs, node
@@ -72,4 +76,6 @@ def define_node(
 
         if output.dtype == ts.DType.INT8:
             # insert RESCALE from int32 back to int8
-            tqutils.insert_rescale_op_to_int8(tosa_graph, min_output, scale_back, node)
+            tqutils.insert_rescale_node_back_to_int8(
+                tosa_graph, min_output, scale_back, node
+            )
diff --git a/backends/arm/tosa_quant_utils.py b/backends/arm/tosa_quant_utils.py
@@ -57,19 +57,14 @@ def insert_rescale_ops_to_int32(
     the graph upstream for DQ nodes.
     """
 
-    from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-        get_input_qparams,
-    )
-
     tensors = inputs.copy()
 
     # Reshape tensor according to TOSA dim order
     for tensor in tensors:
         dim_order = tensor.dim_order
         tensor.shape = [tensor.shape[i] for i in dim_order]
 
-    input_qparams = get_input_qparams(node)
-    qargs = input_qparams.values()
+    qargs = list(cast(dict[int, QuantArgs], node.meta["input_qparams"]).values())
 
     # Scale the int8 quantized input to a common scale in the integer
     # domain
@@ -89,7 +84,7 @@ def insert_rescale_ops_to_int32(
     return rescaled_nodes, min_scale
 
 
-def insert_rescale_op_to_int8(
+def insert_rescale_node_back_to_int8(
     tosa_graph: ts.TosaSerializer,
     last_tensor: TosaArg,
     scale: float,
@@ -107,14 +102,9 @@ def insert_rescale_op_to_int8(
     in the node meta dict as opposed to 'rescale_node_back_to_int8' which search
     the graph downstream for Q nodes.
     """
-    from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-        get_output_qparams,
-    )
-
-    output_qparams = get_output_qparams(node)
-    assert len(output_qparams) == 1, "More than one output not supported"
+    assert len(node.meta["output_qparams"]) == 1
 
-    qargs_out = output_qparams[0]
+    qargs_out = cast(dict[int, QuantArgs], node.meta["output_qparams"])[0]
     output_rescale_scale = scale / qargs_out.scale
 
     # Rescale Back to INT8
@@ -146,17 +136,6 @@ def quantize_value(self, x):
     def dequantize_value(self, qx: int) -> float:
         return (qx - self.zp) * self.scale
 
-    def __eq__(self, other):
-        if isinstance(other, QuantArgs):
-            return (
-                self.scale == other.scale
-                and self.zp == other.zp
-                and self.qmin == other.qmin
-                and self.qmax == other.qmax
-                and self.dtype == other.dtype
-            )
-        return False
-
     @classmethod
     def from_operator(cls, op, args):
         if op in dq_q_ops: