Add support for concat q/dq folding

oscarandersson8218 · oscarandersson8218 · commit 4c802948b57d · 2024-12-20T08:12:00.000+01:00
This is a special case where node.args can be lists with many incoming
dq-nodes.

Signed-off-by: Oscar Andersson &lt;oscar.andersson@arm.com&gt;
Change-Id: Icf511a8bdeaaffb597b18455ab7f1fbd947ce3ca
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -90,14 +90,15 @@ def transform_to_backend_pipeline(
         self.add_pass(
             FoldAndAnnotateQParamsPass(
                 [
-                    exir_ops.edge.aten.minimum.default,
-                    exir_ops.edge.aten.maximum.default,
                     exir_ops.edge.aten.add.Tensor,
                     exir_ops.edge.aten.avg_pool2d.default,
+                    exir_ops.edge.aten.cat.default,
                     exir_ops.edge.aten.convolution.default,
                     exir_ops.edge.aten.exp.default,
                     exir_ops.edge.aten.full.default,
                     exir_ops.edge.aten.log.default,
+                    exir_ops.edge.aten.maximum.default,
+                    exir_ops.edge.aten.minimum.default,
                     exir_ops.edge.aten.reciprocal.default,
                     exir_ops.edge.aten.rsqrt.default,
                     exir_ops.edge.aten.sigmoid.default,
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -80,6 +80,46 @@ def __init__(self, targeted_ops: Iterable[EdgeOpOverload]) -> None:
         super().__init__()
         self.targeted_ops = targeted_ops
 
+    def fold_and_annotate_arg(
+        self, graph_module: GraphModule, node: Node, arg_list: list[Node], i: int
+    ):
+        input_qparams = None
+        nodes_to_remove = set()
+        for arg in arg_list:
+            if not isinstance(arg, Node):
+                return
+            """
+             Make sure arg has requires_grad set to False
+             For parameters that are not quantized, sometimes (i.e. convolution)
+             the Parameter(FakeTensor(...)) has requires_grad set to True, which
+             causes the retracing of the graph to fail with:
+
+             E       RuntimeError: isDifferentiableType(variable.scalar_type()) INTERNAL ASSERT FAILED at "/Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/functions/utils.h":74, please report a bug to PyTorch.
+             E
+             E       While executing %aten_convolution_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%quantized_decomposed_quantize_per_tensor_default, %b__frozen_param0, %p__param_constant1, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {})
+             E       Original traceback:
+             E         File "/Users/perast01/src/executorch/backends/arm/test/ops/test_conv2d.py", line 110, in forward
+             E           x = conv(x)
+            """
+            if arg.op == "placeholder":
+                arg.meta["val"].requires_grad = False
+
+            arg_quant_params = None
+            if arg.target == dq_op:
+                arg_quant_params = QuantArgs.from_operator(arg.target, arg.args)
+                # add arg to nodes_to_remove to fold the dq-node
+                nodes_to_remove.add(arg)
+            if input_qparams is not None and input_qparams != arg_quant_params:
+                # Two args are quantized differently
+                raise RuntimeError("Input qparams does not match!")
+            input_qparams = arg_quant_params
+        if input_qparams is not None:
+            node.meta["input_qparams"][i] = input_qparams
+            for n in nodes_to_remove:
+                assert n.target == dq_op
+                n.replace_all_uses_with(n.args[0])
+                graph_module.graph.erase_node(n)
+
     def call(self, graph_module: GraphModule) -> PassResult:
 
         # Loop over the graph nodes and find any node in the 'targeted_ops' list.
@@ -98,36 +138,11 @@ def call(self, graph_module: GraphModule) -> PassResult:
             n.meta["input_qparams"] = {}
             n.meta["output_qparams"] = {}
             for i, arg in enumerate(n.args):
-                if not isinstance(arg, Node):
-                    continue
-
-                # Make sure arg has requires_grad set to False
-                # For parameters that are not quantized, sometimes (i.e. convolution)
-                # the Parameter(FakeTensor(...)) has requires_grad set to True, which
-                # causes the retracing of the graph to fail with:
-                #
-                # E       RuntimeError: isDifferentiableType(variable.scalar_type()) INTERNAL ASSERT FAILED at "/Users/runner/work/pytorch/pytorch/pytorch/torch/csrc/autograd/functions/utils.h":74, please report a bug to PyTorch.
-                # E
-                # E       While executing %aten_convolution_default : [num_users=1] = call_function[target=executorch.exir.dialects.edge._ops.aten.convolution.default](args = (%quantized_decomposed_quantize_per_tensor_default, %b__frozen_param0, %p__param_constant1, [1, 1], [0, 0], [1, 1], False, [0, 0], 1), kwargs = {})
-                # E       Original traceback:
-                # E         File "/Users/perast01/src/executorch/backends/arm/test/ops/test_conv2d.py", line 110, in forward
-                # E           x = conv(x)
-                #
-                if arg.op == "placeholder":
-                    arg.meta["val"].requires_grad = False
-
-                if arg.target != dq_op:
-                    continue
-
-                # arg.target for argument i is a dequant node, extract the information
-                n.meta["input_qparams"][i] = QuantArgs.from_operator(
-                    arg.target, arg.args
-                )
+                if isinstance(arg, list):
+                    self.fold_and_annotate_arg(graph_module, n, arg, i)
 
-                # arg.args[0] is the tensor input, replace the input usage
-                tensor_input = cast(Node, arg.args[0])
-                n.replace_input_with(arg, tensor_input)
-                graph_module.graph.erase_node(arg)
+                elif isinstance(arg, Node):
+                    self.fold_and_annotate_arg(graph_module, n, [arg], i)
 
             # Copy the users, since we are modifying it.
             users_copy = copy.copy(n.users)