Conditionally support expand_copy in XNNPACK delegate

GregoryComer · GregoryComer · commit 7ce711fbe339 · 2025-01-27T00:42:15.000-08:00
diff --git a/backends/xnnpack/_passes/__init__.py b/backends/xnnpack/_passes/__init__.py
@@ -18,6 +18,7 @@
     ConvertToUpsampleBilinear2d,
 )
 from executorch.backends.xnnpack._passes.decompose_cat import DecomposeConcatenate
+from executorch.backends.xnnpack._passes.expand_to_view_pass import ExpandToViewPass
 from executorch.backends.xnnpack._passes.fuse_activation_pass import FuseActivationPass
 from executorch.backends.xnnpack._passes.fuse_batch_norm_with_conv import (
     FuseBatchNormWithConvPass,
@@ -62,6 +63,7 @@ def __init__(
                 ConvertToLinearPass,
                 ConvertToSDPAPass,
                 ConstPropPass,
+                ExpandToViewPass,
                 FuseBatchNormWithConvPass,
                 FuseActivationPass,
                 DecomposeConcatenate,
diff --git a/backends/xnnpack/_passes/expand_to_view_pass.py b/backends/xnnpack/_passes/expand_to_view_pass.py
@@ -0,0 +1,76 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch
+from executorch.backends.xnnpack.utils.utils import get_input_node
+from executorch.exir.dialects._ops import ops as exir_ops
+
+from executorch.exir.pass_base import ExportPass, PassResult
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+
+
+class ExpandToViewPass(ExportPass):
+    """
+    Torch expand_copy can be used as an altenative to unsqueeze. This pass replaces expand_copy nodes
+    that only add one or more singleton dimensions.
+
+
+    Example:
+    Before Pass:
+        expand: "f32" = torch.ops.aten.expand_copy.default(x, (1, -1));
+
+    After Pass:
+        view: "f32" = torch.ops.aten.view_copy.default(x, (1, -1));
+    """
+
+    @staticmethod
+    def can_transform_expand_node(node: torch.fx.Node) -> bool:
+        # The node can be converted to a view if the expand only inserts singleton dimensions and
+        # does not modify any existing dimensions.
+        in_shape = get_input_node(node, 0).meta["val"].shape
+        out_shape = node.meta["val"].shape
+
+        i = 0  # in-shape index
+        j = 0  # out-shape index
+        while j < len(out_shape):
+            if i >= len(in_shape):  # Shape mismatch
+                return False
+            elif in_shape[i] == out_shape[j]:  # Dims match
+                i += 1
+                j += 1
+            elif out_shape[j] == 1:  # Inserted singleton dim
+                j += 1
+            else:  # Dim mismatch (in_shape[i] != out_shape[i])
+                return False
+
+        return True
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        gm = graph_module
+        for node in gm.graph.nodes:
+            if (
+                node.op == "call_function"
+                and node.target == exir_ops.edge.aten.expand_copy.default
+                and ExpandToViewPass.can_transform_expand_node(node)
+            ):
+                with gm.graph.inserting_after(node):
+                    view_node = gm.graph.create_node(
+                        "call_function",
+                        target=exir_ops.edge.aten.view_copy.default,
+                        args=(node.args[0], node.args[1]),
+                        kwargs=node.kwargs,
+                    )
+
+                    node.replace_all_uses_with(view_node)
+                    gm.graph.erase_node(node)
+
+        gm.recompile()
+        new_gm = super().call(gm).graph_module
+        return PassResult(new_gm, True)
diff --git a/backends/xnnpack/partition/config/__init__.py b/backends/xnnpack/partition/config/__init__.py
@@ -25,6 +25,7 @@
     ConstantPadConfig,
     DeQuantizedPerTensorConfig,
     DivConfig,
+    ExpandCopyConfig,
     FloorConfig,
     HardswishConfig,
     # EluConfig,
@@ -77,6 +78,7 @@
     ClampConfig,
     DivConfig,
     # EluConfig, # Waiting for PyTorch Pin Update
+    ExpandCopyConfig,
     FloorConfig,
     HardtanhConfig,
     HardswishConfig,
diff --git a/backends/xnnpack/partition/config/generic_node_configs.py b/backends/xnnpack/partition/config/generic_node_configs.py
@@ -10,6 +10,7 @@
 from typing import cast, List, Optional
 
 import torch
+from executorch.backends.xnnpack._passes.expand_to_view_pass import ExpandToViewPass
 from executorch.backends.xnnpack.partition.config.xnnpack_config import (
     ConfigPrecisionType,
     XNNPartitionerConfig,
@@ -225,6 +226,29 @@ def get_original_aten(self) -> Optional[torch._ops.OpOverload]:
         return torch.ops.aten.elu.default
 
 
+class ExpandCopyConfig(GenericNodePartitionerConfig):
+    target_name = "expand_copy.default"
+
+    def supported_precision_types(self) -> List[ConfigPrecisionType]:
+        return [ConfigPrecisionType.FP32]
+
+    def get_original_aten(self) -> Optional[torch._ops.OpOverload]:
+        return torch.ops.aten.expand_copy.default
+
+    def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
+        """
+        Only partition expand_copy nodes that can be converted to view_copy (insertion of
+        singleton dims).
+        """
+        if not self.check_common_constraints(node, ep):
+            return False
+
+        if not ExpandToViewPass.can_transform_expand_node(node):
+            why(node, reason="only insertion of singleton dims is supported")
+            return False
+        return True
+
+
 class SoftmaxConfig(GenericNodePartitionerConfig):
     target_name = "_softmax.default"
 
diff --git a/backends/xnnpack/test/ops/test_expand.py b/backends/xnnpack/test/ops/test_expand.py
@@ -0,0 +1,69 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from executorch.backends.xnnpack.test.tester import Tester
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+class TestExpand(unittest.TestCase):
+    class Expand(torch.nn.Module):
+        def __init__(self, out_shape):
+            super().__init__()
+            self.out_shape = out_shape
+
+        def forward(self, x):
+            return x.expand(self.out_shape)
+
+    def test_fp32_insert_dim(self):
+        inputs = (torch.randn(8, 12),)
+        new_shapes = (
+            (1, 8, 12),
+            (1, 1, 8, 12),
+        )
+
+        for new_shape in new_shapes:
+            (
+                Tester(self.Expand(new_shape), tuple(inputs))
+                .export()
+                .check_node_count({torch.ops.aten.expand.default: 1})
+                .to_edge_transform_and_lower()
+                .check_node_count(
+                    {
+                        exir_ops.edge.aten.expand_copy.default: 0,
+                        exir_ops.edge.aten.view_copy.default: 0,
+                        torch.ops.higher_order.executorch_call_delegate: 1,
+                    }
+                )
+                .to_executorch()
+                .run_method_and_compare_outputs()
+            )
+
+    def test_fp32_unsupported_expand(self):
+        inputs = (torch.randn(1, 8, 12),)
+        new_shapes = (
+            (2, 8, 12),
+            (1, 2, 8, 12),
+            (2, 1, 8, 12),
+        )
+
+        for new_shape in new_shapes:
+            (
+                Tester(self.Expand(new_shape), tuple(inputs))
+                .export()
+                .check_node_count({torch.ops.aten.expand.default: 1})
+                .to_edge_transform_and_lower()
+                .check_node_count(
+                    {
+                        exir_ops.edge.aten.expand_copy.default: 1,
+                        exir_ops.edge.aten.view_copy.default: 0,
+                    }
+                )
+                .to_executorch()
+                .run_method_and_compare_outputs()
+            )
diff --git a/backends/xnnpack/test/passes/test_expand_to_view_pass.py b/backends/xnnpack/test/passes/test_expand_to_view_pass.py
@@ -0,0 +1,71 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from executorch.backends.xnnpack._passes.expand_to_view_pass import ExpandToViewPass
+from executorch.backends.xnnpack.test.tester import RunPasses, Tester
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+class TestExpandToViewPass(unittest.TestCase):
+    PassStage = RunPasses([ExpandToViewPass])
+
+    class Expand(torch.nn.Module):
+        def __init__(self, out_shape):
+            super().__init__()
+            self.out_shape = out_shape
+
+        def forward(self, x):
+            return x.expand(self.out_shape)
+
+    def test_fp32_insert_dim(self):
+        inputs = (torch.randn(8, 12),)
+        new_shapes = (
+            (1, 8, 12),
+            (1, 1, 8, 12),
+        )
+
+        for new_shape in new_shapes:
+            (
+                Tester(self.Expand(new_shape), tuple(inputs))
+                .export()
+                .to_edge()
+                .check_node_count({exir_ops.edge.aten.expand_copy.default: 1})
+                .run_passes(self.PassStage)
+                .check_node_count(
+                    {
+                        exir_ops.edge.aten.expand_copy.default: 0,
+                        exir_ops.edge.aten.view_copy.default: 1,
+                    }
+                )
+                .run_method_and_compare_outputs()
+            )
+
+    def test_fp32_unsupported_expand(self):
+        inputs = (torch.randn(1, 8, 12),)
+        new_shapes = (
+            (2, 8, 12),
+            (1, 2, 8, 12),
+            (2, 1, 8, 12),
+        )
+
+        for new_shape in new_shapes:
+            (
+                Tester(self.Expand(new_shape), tuple(inputs))
+                .export()
+                .to_edge()
+                .check_node_count({exir_ops.edge.aten.expand_copy.default: 1})
+                .run_passes(self.PassStage)
+                .check_node_count(
+                    {
+                        exir_ops.edge.aten.expand_copy.default: 1,
+                        exir_ops.edge.aten.view_copy.default: 0,
+                    }
+                )
+                .run_method_and_compare_outputs()
+            )