pytorch
diff --git a/‎CMakeLists.txt‎
Lines changed: 8 additions & 10 deletions b/‎CMakeLists.txt‎
Lines changed: 8 additions & 10 deletions
diff --git a/‎backends/apple/mps/CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions b/‎backends/apple/mps/CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎backends/cadence/CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions b/‎backends/cadence/CMakeLists.txt‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/graph_builder.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/cadence/aot/graph_builder.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/cadence/aot/pass_utils.py‎
Lines changed: 31 additions & 0 deletions b/‎backends/cadence/aot/pass_utils.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎backends/cadence/aot/remove_ops.py‎
Lines changed: 43 additions & 54 deletions b/‎backends/cadence/aot/remove_ops.py‎
Lines changed: 43 additions & 54 deletions
diff --git a/‎backends/cadence/aot/simplify_ops.py‎
Lines changed: 38 additions & 1 deletion b/‎backends/cadence/aot/simplify_ops.py‎
Lines changed: 38 additions & 1 deletion
diff --git a/‎backends/cadence/aot/tests/test_remove_ops_passes.py‎
Lines changed: 27 additions & 0 deletions b/‎backends/cadence/aot/tests/test_remove_ops_passes.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎backends/cadence/aot/tests/test_simplify_ops_passes.py‎
Lines changed: 35 additions & 1 deletion b/‎backends/cadence/aot/tests/test_simplify_ops_passes.py‎
Lines changed: 35 additions & 1 deletion
@@ -48,6 +48,9 @@ project(executorch)
 # MARK: - Start EXECUTORCH_H12025_BUILD_MIGRATION --------------------------------------------------
 
 include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
+include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
+include(CMakeDependentOption)
+include(ExternalProject)
 
 if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 17)
@@ -64,10 +67,14 @@ if(NOT CMAKE_BUILD_TYPE)
 endif()
 announce_configured_options(CMAKE_BUILD_TYPE)
 
+if(NOT PYTHON_EXECUTABLE)
+  resolve_python_executable()
+endif()
+announce_configured_options(PYTHON_EXECUTABLE)
+
 announce_configured_options(CMAKE_CXX_COMPILER_ID)
 announce_configured_options(CMAKE_TOOLCHAIN_FILE)
 announce_configured_options(BUCK2)
-announce_configured_options(PYTHON_EXECUTABLE)
 
 load_build_preset()
 include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
@@ -77,10 +84,6 @@ print_configured_options()
 
 # MARK: - End EXECUTORCH_H12025_BUILD_MIGRATION ----------------------------------------------------
 
-include(tools/cmake/Utils.cmake)
-include(CMakeDependentOption)
-include(ExternalProject)
-
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # Setup RPATH.
@@ -256,11 +259,6 @@ if(EXECUTORCH_BUILD_TESTS)
   include(CTest)
 endif()
 
-if(NOT PYTHON_EXECUTABLE)
-  resolve_python_executable()
-endif()
-message(STATUS "Using python executable '${PYTHON_EXECUTABLE}'")
-
 # TODO(dbort): Fix these warnings and remove this flag.
 set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 
 
@@ -18,10 +18,6 @@ endif()
 
 include(${EXECUTORCH_ROOT}/tools/cmake/Utils.cmake)
 
-if(NOT PYTHON_EXECUTABLE)
-  resolve_python_executable()
-endif()
-
 set(_common_compile_options -Wno-deprecated-declarations)
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
 
@@ -30,10 +30,6 @@ add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
 if(EXECUTORCH_CADENCE_CPU_RUNNER)
   include(${EXECUTORCH_ROOT}/tools/cmake/Codegen.cmake)
 
-  if(NOT PYTHON_EXECUTABLE)
-  resolve_python_executable()
-  endif()
-
   set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 
   # Find prebuilt libraries. executorch package should contain portable_ops_lib,
 
@@ -367,6 +367,7 @@ python_unittest(
         "fbsource//third-party/pypi/parameterized:parameterized",
         "//caffe2:torch",
         "//executorch/backends/cadence/aot:compiler",
+        "//executorch/backends/cadence/aot:graph_builder",
         "//executorch/backends/cadence/aot:ops_registrations",
         "//executorch/backends/cadence/aot:pass_utils",
         "//executorch/backends/cadence/aot:simplify_ops",
 
@@ -96,6 +96,11 @@ def call_submodule(
     ) -> PassResult:
         return ExportPass().call(graph_module)
 
+    def call_getitem(
+        self, value: ProxyValue, key: int, meta: Optional[NodeMetadata] = None
+    ) -> ProxyValue:
+        return super().call_getitem(value, key, meta or NodeMetadata({}))
+
     def _fx(
         self,
         kind: str,
 
@@ -157,3 +157,34 @@ def nodes_not_adjacent_in_gm(
         if node.next.target == succ_target:
             return False
     return True
+
+
+def get_arg(
+    node: torch.fx.Node,
+    arg_index: int,
+    kwarg_name: str,
+    *,
+    default: torch.fx.node.Argument = None,
+) -> torch.fx.node.Argument:
+    """
+    Get the arg at arg_index or kwarg with arg_name of the node. If neither is found
+    return default.
+    """
+    if arg_index < len(node.args):
+        return node.args[arg_index]
+    elif kwarg_name in node.kwargs:
+        return node.kwargs[kwarg_name]
+    else:
+        return default
+
+
+def set_arg(
+    node: torch.fx.Node, arg_index: int, kwarg_name: str, value: torch.fx.node.Argument
+) -> None:
+    """
+    Set the arg at arg_index if it exists, otherwise set the kwarg.
+    """
+    if arg_index < len(node.args):
+        node.update_arg(arg_index, value)
+    else:
+        node.update_kwarg(kwarg_name, value)
@@ -25,7 +25,9 @@
 import torch.fx
 from executorch.backends.cadence.aot.pass_utils import (
     CadencePassAttribute,
+    get_arg,
     register_cadence_pass,
+    set_arg,
 )
 
 from executorch.backends.cadence.aot.simplify_ops import SimplifySliceOpPass
@@ -37,7 +39,7 @@
 from executorch.exir.pass_manager import PassManager, PassType
 from executorch.exir.passes import dead_code_elimination_pass
 from executorch.exir.passes.spec_prop_pass import SpecPropPass
-from torch.fx.node import Argument
+from torch.fx.node import Argument, Node
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=0))
@@ -771,65 +773,52 @@ def remove_branched(
 
 
 class RemoveCatFromSliceCopyPass(ExportPass):
-    def _remove_unused_cat(  # noqa: C901
-        self, graph_module: torch.fx.GraphModule
-    ) -> None:
-        slice_copy_nodes = [
-            node
-            for node in graph_module.graph.nodes
-            if node.target == exir_ops.edge.aten.slice_copy.Tensor
-        ]
-        for slice_copy_node in slice_copy_nodes:
-            slice_dim, start_idx, end_idx, step = 0, 0, float("inf"), 1
-            input_node, *other_args = slice_copy_node.args
-            if len(other_args) >= 1:
-                slice_dim = other_args[0]
-            if len(other_args) >= 2:
-                start_idx = other_args[1]
-            if len(other_args) >= 3:
-                end_idx = other_args[2]
-            if len(other_args) >= 4:
-                step = other_args[3]
-            if step != 1:
-                continue
-            slice_copy_dtype = slice_copy_node.meta["val"].dtype
-            if input_node.target != exir_ops.edge.aten.cat.default:
-                continue
-            cat_dtype = input_node.meta["val"].dtype
-            if slice_copy_dtype != cat_dtype:
+    """
+    Simplifies cat->slice_copy chains where one of the cat inputs can be directly passed
+    to the slice_copy.
+    """
+
+    def _remove_unused_cat(self, graph_module: torch.fx.GraphModule) -> None:
+        for slice_copy_node in graph_module.graph.find_nodes(
+            op="call_function", target=exir_ops.edge.aten.slice_copy.Tensor
+        ):
+            cat_node = cast(Node, get_arg(slice_copy_node, 0, "input"))
+            slice_dim = cast(int, get_arg(slice_copy_node, 1, "dim", default=0))
+            start_idx = cast(int, get_arg(slice_copy_node, 2, "start", default=None))
+            end_idx = cast(int, get_arg(slice_copy_node, 3, "end", default=None))
+            step = cast(int, get_arg(slice_copy_node, 4, "step", default=1))
+
+            if cat_node.target != exir_ops.edge.aten.cat.default or step != 1:
                 continue
-            cat_dim = input_node.args[1:]
-            if len(cat_dim) == 0:
-                cat_dim = 0
+
+            # Make sure cat and slice happens on the same dimension.
+            cat_dim = cast(Node, get_arg(cat_node, 1, "dim", default=0))
             if cat_dim != slice_dim:
                 continue
-            cat_output_shape = input_node.meta["val"].shape
-            start_idx = (
-                cat_output_shape[cat_dim] + start_idx if start_idx < 0 else start_idx
-            )
-            end_idx = (
-                cat_output_shape[cat_dim]
-                if end_idx > cat_output_shape[cat_dim]
-                else end_idx
-            )
-            base_idx = 0
-            cat_input_to_keep = None
-            for cat_input_node in input_node.args[0]:
-                cat_input_dtype = cat_input_node.meta["val"].dtype
-                if slice_copy_dtype != cat_input_dtype:
-                    continue
+
+            # Canonicalize slice indices.
+            cat_output_shape = cat_node.meta["val"].shape
+            if start_idx is None:
+                start_idx = 0
+            elif start_idx < 0:
+                start_idx += cat_output_shape[cat_dim]
+            if end_idx is None or end_idx > cat_output_shape[cat_dim]:
+                end_idx = cat_output_shape[cat_dim]
+            elif end_idx < 0:
+                end_idx += cat_output_shape[cat_dim]
+
+            offset = 0
+            for cat_input_node in cast(List[Node], get_arg(cat_node, 0, "tensors")):
                 cat_input_shape = cat_input_node.meta["val"].shape
 
-                # check if the slice range overlaps with the cat range
-                if (
-                    base_idx <= start_idx
-                    and end_idx <= list(cat_input_shape)[cat_dim] + base_idx
-                ):
-                    cat_input_to_keep = cat_input_node
+                # Check if the slice range overlaps with the cat input range.
+                if offset <= start_idx and end_idx <= offset + cat_input_shape[cat_dim]:
+                    slice_copy_node.replace_input_with(cat_node, cat_input_node)
+                    set_arg(slice_copy_node, 2, "start", start_idx - offset)
+                    set_arg(slice_copy_node, 3, "end", end_idx - offset)
                     break
-                base_idx += list(cat_input_shape)[cat_dim]
-            if cat_input_to_keep is not None:
-                slice_copy_node.replace_input_with(input_node, cat_input_to_keep)
+
+                offset += cat_input_shape[cat_dim]
 
     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
         self._remove_unused_cat(graph_module)
 
@@ -16,9 +16,10 @@
     CadencePassAttribute,
     register_cadence_pass,
 )
-
 from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.dialects.edge._ops import EdgeOpOverload
 from executorch.exir.pass_base import ExportPass, ProxyValue
+from torch.fx.operator_schemas import get_signature_for_torch_op
 
 
 @register_cadence_pass(CadencePassAttribute(opt_level=0))
@@ -109,8 +110,44 @@ def call_operator(self, op, args, kwargs, meta):
         return super().call_operator(op, new_args, kwargs, meta)
 
 
+@register_cadence_pass(CadencePassAttribute(opt_level=0))
+class BindOptionalArgsPass(ExportPass):
+    """Bind all optional args and kwargs."""
+
+    def call_operator(self, op, args, kwargs, meta):
+        if not isinstance(op, EdgeOpOverload):
+            return super().call_operator(op, args, kwargs, meta)
+        assert callable(op)
+
+        torch_op_schemas = get_signature_for_torch_op(op._op)
+        if len(torch_op_schemas) == 0:
+            return super().call_operator(op, args, kwargs, meta)
+
+        matched_schemas = []
+        # Iterate through all of the schema until we find one that matches
+        # If one matches, populate `new_args_and_kwargs` with the new args/kwargs
+        # values. If none matches, `new_args_and_kwargs` will be None
+        for candidate_signature in torch_op_schemas:
+            try:
+                candidate_signature.bind(*args, **kwargs)
+                matched_schemas.append(candidate_signature)
+            except TypeError:
+                continue
+
+        if len(matched_schemas) != 1:
+            # Did not match any schema. Cannot normalize
+            return super().call_operator(op, args, kwargs, meta)
+
+        sig = matched_schemas[0]
+        bound_args = sig.bind(*args, **kwargs)
+        bound_args.apply_defaults()
+
+        return super().call_operator(op, bound_args.args, bound_args.kwargs, meta)
+
+
 # This class encapsulates all the functions that simplify the op's args
 class CadenceSimplifyOpsInGraph:
     passes = [
         SimplifySliceOpPass,
+        BindOptionalArgsPass,
     ]
@@ -864,3 +864,30 @@ def forward(self, x, y):
 
         # Ensure both cat nodes were removed
         self.assertEqual(count_node(graph_module, exir_ops.edge.aten.cat.default), 0)
+
+    def test_remove_cat_from_slice_copy_second_input(self) -> None:
+        builder = GraphBuilder()
+        x = builder.placeholder("x", torch.randn(2, 4))
+        y = builder.placeholder("y", torch.randn(2, 4))
+        cat = builder.call_operator(
+            op=exir_ops.edge.aten.cat.default,
+            args=((x, y), 1),
+        )
+        slice_copy = builder.call_operator(
+            op=exir_ops.edge.aten.slice_copy.Tensor,
+            args=(cat, 1, 5, 7, 1),
+        )
+        builder.output([slice_copy])
+        graph_module = builder.get_graph_module()
+
+        inputs = (torch.randn(2, 4), torch.randn(2, 4))
+        expected_outputs = graph_module(*inputs)[0]
+
+        p = RemoveCatFromSliceCopyPass()
+        graph_module = cast(PassResult, p(graph_module)).graph_module
+
+        # Cat should be removed.
+        self.assertEqual(count_node(graph_module, exir_ops.edge.aten.cat.default), 0)
+
+        # Output should remain the same.
+        self.assertTrue(torch.equal(graph_module(*inputs)[0], expected_outputs))
@@ -13,8 +13,12 @@
 import executorch.backends.cadence.aot.ops_registrations  # noqa
 import torch
 from executorch.backends.cadence.aot.compiler import export_to_edge
+from executorch.backends.cadence.aot.graph_builder import single_op_builder
 from executorch.backends.cadence.aot.pass_utils import count_node
-from executorch.backends.cadence.aot.simplify_ops import SimplifySliceOpPass
+from executorch.backends.cadence.aot.simplify_ops import (
+    BindOptionalArgsPass,
+    SimplifySliceOpPass,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
 from parameterized.parameterized import parameterized
 from torch.fx.passes.infra.pass_base import PassResult
@@ -112,3 +116,33 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         self.assertEqual(
             count_node(graph_after_passes, exir_ops.edge.aten.full.default), 1
         )
+
+    def test_simplify_slice_op_args(self) -> None:
+        x = torch.rand(4, 5)
+        gm = single_op_builder(
+            placeholders=(x,),
+            op=exir_ops.edge.aten.slice_copy.Tensor,
+            args=(x, 1),
+            kwargs={"end": 3},
+        )
+        self.assertEqual(
+            [
+                (n.args[1:], n.kwargs)
+                for n in gm.graph.find_nodes(
+                    op="call_function", target=exir_ops.edge.aten.slice_copy.Tensor
+                )
+            ],
+            [((1,), {"end": 3})],
+        )
+
+        gm = BindOptionalArgsPass().call(gm).graph_module
+
+        self.assertEqual(
+            [
+                (n.args[1:], n.kwargs)
+                for n in gm.graph.find_nodes(
+                    op="call_function", target=exir_ops.edge.aten.slice_copy.Tensor
+                )
+            ],
+            [((1, None, 3, 1), {})],
+        )