Add pass changing (un-)squeeze ops to view in TOSA lowering (#7784)

AdrianLundell · YIWENX14 · commit 8178183b8fee · 2025-01-28T14:20:31.000-08:00
Since squeeze ops are special cases of the view op it is enough to handle only view ops, removing the need for the squeeze/unsqueeze node visitors.
diff --git a/backends/arm/_passes/annotate_channels_last_dim_order_pass.py b/backends/arm/_passes/annotate_channels_last_dim_order_pass.py
@@ -12,7 +12,6 @@
 from executorch.backends.arm._passes.arm_pass_utils import (
     create_node,
     get_first_fake_tensor,
-    get_node_arg,
     insert_q_dq_pair,
 )
 from executorch.backends.arm.tosa_quant_utils import dq_op, q_op
@@ -26,9 +25,8 @@
 # when lowering to TOSA, e.g. a passthrough_to_tosa._transpose will not affect
 # the edge IR graph but will be lowered to a TOSA-TRANSPOSE.
 lib = Library("passthrough_to_tosa", "DEF")
-# For operators that change the rank of the input, such as unsqueeze and squeeze, we may need
-# to switch dim_order before the opertation. Changing tosa_dim_order is not sufficient
-# as we also need transpose the data into the correct data format.
+# For certain operators we need the data in a specific data format. Changing tosa_dim_order
+# is not sufficient as we also need transpose the data.
 # By utilizing an edge IR passthrough operator we can keep the edge program in
 # channels-first/contiguous and get the desired behavior in the TOSA lowering.
 lib.define("_transpose(Tensor self, int[] dim_order) -> Tensor")
@@ -153,27 +151,6 @@ def insert_output_transpose(node, graph_module):
                 q_params = node.args[0].args[1:]
                 insert_q_dq_pair(graph_module.graph, node, q_params)
 
-    @staticmethod
-    def _insert_squeeze_transpose(
-        input_shape, output_shape, node, input_node, graph_module
-    ):
-        nhwc_to_nhwc = len(input_shape) == 4 and len(output_shape) <= 3
-
-        if nhwc_to_nhwc and AnnotateChannelsLastDimOrder.memory_format_differs(
-            input_shape
-        ):
-            AnnotateChannelsLastDimOrder.insert_input_transpose(
-                node, input_node, graph_module
-            )
-
-    @staticmethod
-    def _insert_unsqueeze_transpose(input_shape, output_shape, node, graph_module):
-        nchw_to_nhwc = len(input_shape) == 3 and len(output_shape) == 4
-        if nchw_to_nhwc and AnnotateChannelsLastDimOrder.memory_format_differs(
-            output_shape
-        ):
-            AnnotateChannelsLastDimOrder.insert_output_transpose(node, graph_module)
-
     @staticmethod
     def _insert_view_transpose(
         input_shape, output_shape, node, input_node, graph_module
@@ -199,8 +176,6 @@ def insert_tosa_transposes(self, graph_module: torch.fx.GraphModule):
         """
         Transposes are needed for operators transforming the input to a different rank, as 4D-tensors are assumed to be in NHWC-format, whereas all other are in NCHW format.
         This is relevant for the following cases:
-        - squeeze:     4D -> <4D
-        - unsqueeze:   3D ->  4D
         - view:       <4D ->  4D
         - view:        4D -> <4D
         Additionally, a 4D->4D view operation acting on the channel dimension currently needs to be performed in NCHW format, leadning to one extra input and output transpose for this case.
@@ -214,27 +189,6 @@ def insert_tosa_transposes(self, graph_module: torch.fx.GraphModule):
             if node.op != "call_function":
                 continue
 
-            if node.target == exir_ops.edge.aten.squeeze_copy.dims:
-                input_node = node.args[0]
-                input_shape = input_node.meta["val"].shape
-                output_shape = node.meta["val"].shape
-
-                self._insert_squeeze_transpose(
-                    input_shape, output_shape, node, input_node, graph_module
-                )
-
-            elif node.target == exir_ops.edge.aten.unsqueeze_copy.default:
-                input_node = get_node_arg(node.args, 0, default_value=False)
-                if input_node:
-                    input_shape = input_node.meta["val"].shape
-                else:
-                    input_shape = ()
-                output_shape = node.meta["val"].shape
-
-                self._insert_unsqueeze_transpose(
-                    input_shape, output_shape, node, graph_module
-                )
-
             elif node.target == exir_ops.edge.aten.view_copy.default:
                 input_node = node.args[0]
                 input_shape = input_node.meta["val"].shape
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -21,6 +21,9 @@
 from executorch.backends.arm._passes.convert_split_to_slice import (
     ConvertSplitToSlicePass,
 )
+from executorch.backends.arm._passes.convert_squeezes_to_view import (
+    ConvertSqueezesToViewPass,
+)
 from executorch.backends.arm._passes.decompose_div_pass import DecomposeDivPass
 from executorch.backends.arm._passes.decompose_layernorm_pass import (
     DecomposeLayerNormPass,
@@ -100,6 +103,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(KeepDimsFalseToSqueezePass())
         self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSelectPass())
+        self.add_pass(ConvertSqueezesToViewPass())
 
         self.add_pass(AnnotateChannelsLastDimOrder())
 
@@ -135,6 +139,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(KeepDimsFalseToSqueezePass())
         self.add_pass(Conv1dUnsqueezePass(exported_program))
         self.add_pass(DecomposeSelectPass())
+        self.add_pass(ConvertSqueezesToViewPass())
 
         self.add_pass(AnnotateChannelsLastDimOrder())
 
diff --git a/backends/arm/_passes/convert_squeezes_to_view.py b/backends/arm/_passes/convert_squeezes_to_view.py
@@ -0,0 +1,30 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+class ConvertSqueezesToViewPass(ExportPass):
+    """
+    Replaces squeeze/unsqueeze operators with view. These are simply special cases of the view op, so removing them gives us less cases to handle in the node visitiors.
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in [
+            exir_ops.edge.aten.squeeze_copy.dims,
+            exir_ops.edge.aten.unsqueeze_copy.default,
+        ]:
+            return super().call_operator(op, args, kwargs, meta)
+
+        x = args[0]
+        shape = meta["val"].size()
+        view_args = (x, list(shape))
+        return super().call_operator(
+            exir_ops.edge.aten.view_copy.default, view_args, kwargs, meta
+        )
diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py
@@ -30,14 +30,12 @@
     op_rsqrt,
     op_sigmoid,
     op_slice,
-    op_squeeze,
     op_sub,
     op_sum,
     op_table,
     op_tanh,
     op_to_copy,
     op_transpose,
-    op_unsqueeze,
     op_upsample_nearest2d,
     op_view,
 )
diff --git a/backends/arm/operators/op_squeeze.py b/backends/arm/operators/op_squeeze.py
diff --git a/backends/arm/operators/op_unsqueeze.py b/backends/arm/operators/op_unsqueeze.py