pytorch
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 0 additions & 39 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 0 additions & 39 deletions
diff --git a/‎CODEOWNERS‎
Lines changed: 12 additions & 12 deletions b/‎CODEOWNERS‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎backends/aoti/aoti_partitioner.py‎
Lines changed: 95 additions & 0 deletions b/‎backends/aoti/aoti_partitioner.py‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎backends/aoti/targets.bzl‎
Lines changed: 15 additions & 0 deletions b/‎backends/aoti/targets.bzl‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎backends/apple/metal/metal_partitioner.py‎
Lines changed: 5 additions & 57 deletions b/‎backends/apple/metal/metal_partitioner.py‎
Lines changed: 5 additions & 57 deletions
diff --git a/‎backends/arm/test/common.py‎
Lines changed: 12 additions & 6 deletions b/‎backends/arm/test/common.py‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎backends/arm/test/models/test_nn_modules.py‎
Lines changed: 0 additions & 2 deletions b/‎backends/arm/test/models/test_nn_modules.py‎
Lines changed: 0 additions & 2 deletions
@@ -1043,45 +1043,6 @@ jobs:
       build-tool: cmake
       docker-image: ci-image:executorch-ubuntu-22.04-clang12
 
-  test-mcu-models:
-    name: test-mcu-models
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
-
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Try to mirror these as closely as possible
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-
-        .ci/scripts/setup-arm-baremetal-tools.sh
-        source examples/arm/ethos-u-scratch/setup_path.sh
-
-        # Run selective Build
-        chmod +x examples/selective_build/test_selective_build.sh
-        examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
-
-        # Run MCU models
-        chmod +x examples/arm/run_mcu_models_fvp.sh
-        examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
-
   test-models-windows:
     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
     strategy:
 
@@ -49,31 +49,31 @@
 /extension/export_util @kimishpatel
 /extension/flat_tensor @lucylq
 /extension/gguf_util @larryliu0820
-/extension/kernel_util @kimishpatel @manuelcandales @swolchok
-/extension/llm @jackzhxng @larryliu0820 @swolchok @mergennachin
-/extension/memory_allocator @JacobSzwejbka @swolchok
+/extension/kernel_util @kimishpatel @manuelcandales
+/extension/llm @jackzhxng @larryliu0820 @mergennachin
+/extension/memory_allocator @JacobSzwejbka
 /extension/module @shoumikhin
-/extension/parallel @kimishpatel @swolchok
+/extension/parallel @kimishpatel
 /extension/pybindings @JacobSzwejbka @larryliu0820
-/extension/pytree @JacobSzwejbka @swolchok
-/extension/runner_util @swolchok
+/extension/pytree @JacobSzwejbka
+/extension/runner_util
 /extension/tensor @shoumikhin
-/extension/testing_util @swolchok
-/extension/threadpool @kimishpatel @swolchok
+/extension/testing_util
+/extension/threadpool @kimishpatel
 /extension/training @JacobSzwejbka
 
-/kernels @manuelcandales @swolchok
+/kernels @manuelcandales
 
 /profiler @Gasoonjia
 
-/runtime @JacobSzwejbka @lucylq @swolchok
+/runtime @JacobSzwejbka @lucylq
 /runtime/backend @cccclai
 
 /schema @JacobSzwejbka @lucylq
 
-/scripts @GregoryComer @swolchok
+/scripts @GregoryComer
 
-/shim @larryliu0820 @GregoryComer @swolchok
+/shim @larryliu0820 @GregoryComer
 
 /third-party @GregoryComer
 
 
@@ -0,0 +1,95 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Callable, Dict, List, Optional, Tuple
+
+import torch
+from executorch.exir._warnings import experimental
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+from executorch.exir.backend.partitioner import (
+    DelegationSpec,
+    Partitioner,
+    PartitionResult,
+)
+from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
+from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
+from torch.export.exported_program import ExportedProgram
+
+
+@experimental(
+    "This API and all of cuda backend related functionality are experimental."
+)
+class AotiPartitioner(Partitioner):
+    """
+    Base partitioner for AOTInductor-driven backend integration.
+
+    This partitioner creates a single partition containing all operators from the input graph.
+    It skips core ATen decomposition, allowing the backend to handle decomposition using
+    AOTInductor's backend-specific decomposition table.
+
+    Only operators that cannot be handled by the aoti library will be excluded from
+    the partition and fall back to ExecuTorch's default or custom handling.
+    """
+
+    def __init__(self, backend_name: str, compile_spec: List[CompileSpec]) -> None:
+        """
+        Initialize the AOTI partitioner.
+
+        Args:
+            backend_name: The name of the backend (e.g., "CudaBackend", "MetalBackend")
+            compile_spec: List of compilation specifications
+        """
+        self.delegation_spec = DelegationSpec(backend_name, compile_spec)
+
+    def partition(self, exported_program: ExportedProgram) -> PartitionResult:
+        """
+        Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
+        """
+
+        partition_tags: Dict[str, DelegationSpec] = {}
+        tag = "tag0"
+
+        for node in exported_program.graph.nodes:
+            if node.op != "call_function":
+                continue
+            node.meta["delegation_tag"] = tag
+
+        partition_tags[tag] = self.delegation_spec
+
+        tag_constant_data(exported_program)
+        tag_mutated_buffer(exported_program)
+
+        # Tag constant placeholders that have no users
+        # tag_constant_data only tags constants that have users with delegation_tag
+        # but we need to tag all constants for this partition
+        for node in exported_program.graph.nodes:
+            if node.op == "placeholder" and (
+                is_param(exported_program, node)
+                or is_buffer(exported_program, node)
+                or is_lifted_tensor_constant(exported_program, node)
+            ):
+                if "delegation_tag" not in node.meta:
+                    node.meta["delegation_tag"] = tag
+
+        return PartitionResult(
+            tagged_exported_program=exported_program, partition_tags=partition_tags
+        )
+
+    def ops_to_not_decompose(
+        self, ep: ExportedProgram
+    ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
+        """
+        Return a list of operations that should not be decomposed and let the AOT compiler handle them.
+        Currently we skip ATen decompositon for all ops, and let the backend handle them.
+        """
+        do_not_decompose = set()
+
+        for node in ep.graph.nodes:
+            if node.op == "call_function" and isinstance(
+                node.target, torch._ops.OpOverload
+            ):
+                do_not_decompose.add(node.target)
+        return list(do_not_decompose), None
@@ -1,6 +1,21 @@
 load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
 def define_common_targets():
+    runtime.python_library(
+        name = "aoti_partitioner",
+        srcs = [
+            "aoti_partitioner.py",
+        ],
+        visibility = [
+            "//executorch/...",
+        ],
+        deps = [
+            "//caffe2:torch",
+            "//executorch/exir/backend:partitioner",
+            "//executorch/exir/backend:utils",
+        ],
+    )
+
     # AOTI common shims functionality
     runtime.cxx_library(
         name = "common_shims",
 
@@ -4,74 +4,22 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-from typing import Callable, Dict, final, List, Optional, Tuple
+from typing import final, List
 
-import torch
+from executorch.backends.aoti.aoti_partitioner import AotiPartitioner
 from executorch.backends.apple.metal.metal_backend import MetalBackend  # usort: skip
 from executorch.exir._warnings import experimental
 from executorch.exir.backend.compile_spec_schema import CompileSpec
-from executorch.exir.backend.partitioner import (
-    DelegationSpec,
-    Partitioner,
-    PartitionResult,
-)
-from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
-from torch.export.exported_program import ExportedProgram
 
 
 @final
 @experimental(
     "This API and all of Metal backend related functionality are experimental."
 )
-class MetalPartitioner(Partitioner):
+class MetalPartitioner(AotiPartitioner):
     """
-    Metal partitioner for AOTInductor backend integration.
-
-    This partitioner creates a single partition containing all operators from the input graph.
-    It skips core ATen decomposition, allowing the Metal backend to handle decomposition using
-    AOTInductor's MPS-specific decomposition table.
-
-    Only operators that cannot be handled by the aoti-mps library will be excluded from
-    the partition and fall back to ExecuTorch's default or custom handling.
+    Metal partitioner driven by AOTInductor backend.
     """
 
     def __init__(self, compile_spec: List[CompileSpec]) -> None:
-        self.delegation_spec = DelegationSpec(MetalBackend.__name__, compile_spec)
-
-    def partition(self, exported_program: ExportedProgram) -> PartitionResult:
-        """
-        Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
-        """
-
-        partition_tags: Dict[str, DelegationSpec] = {}
-        tag = "tag0"
-
-        for node in exported_program.graph.nodes:
-            if node.op != "call_function":
-                continue
-            node.meta["delegation_tag"] = tag
-
-        partition_tags[tag] = self.delegation_spec
-
-        tag_constant_data(exported_program)
-        tag_mutated_buffer(exported_program)
-
-        return PartitionResult(
-            tagged_exported_program=exported_program, partition_tags=partition_tags
-        )
-
-    def ops_to_not_decompose(
-        self, ep: ExportedProgram
-    ) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
-        """
-        Return a list of operations that should not be decomposed and let the AOT compiler handle them.
-        Currently we skip ATen decompositon for all ops, and let the Metal backend handle them.
-        """
-        do_not_decompose = set()
-
-        for node in ep.graph.nodes:
-            if node.op == "call_function" and isinstance(
-                node.target, torch._ops.OpOverload
-            ):
-                do_not_decompose.add(node.target)
-        return list(do_not_decompose), None
+        super().__init__(MetalBackend.__name__, compile_spec)
@@ -10,7 +10,7 @@
 from datetime import datetime
 
 from pathlib import Path
-from typing import Any, Optional
+from typing import Any, Callable, Optional, ParamSpec, TypeVar
 
 import pytest
 from executorch.backends.arm.ethosu import EthosUCompileSpec
@@ -205,7 +205,7 @@ def get_vgf_compile_spec(
 )
 """Xfails a test if Corsone320 FVP is not installed, or if the executor runner is not built"""
 
-SkipIfNoModelConverter = pytest.mark.skipif(
+SkipIfNoModelConverter = pytest.mark.skipif(  # type: ignore[call-arg]
     condition=not (model_converter_installed()),
     raises=FileNotFoundError,
     reason="Did not find model-converter on path",
@@ -221,14 +221,18 @@ def get_vgf_compile_spec(
 
 xfail_type = str | tuple[str, type[Exception]]
 
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+Decorator = Callable[[Callable[_P, _R]], Callable[_P, _R]]
+
 
 def parametrize(
     arg_name: str,
     test_data: dict[str, Any],
     xfails: dict[str, xfail_type] | None = None,
     strict: bool = True,
     flakies: dict[str, int] | None = None,
-):
+) -> Decorator:
     """
     Custom version of pytest.mark.parametrize with some syntatic sugar and added xfail functionality
         - test_data is expected as a dict of (id, test_data) pairs
@@ -241,7 +245,7 @@ def parametrize(
     if flakies is None:
         flakies = {}
 
-    def decorator_func(func):
+    def decorator_func(func: Callable[_P, _R]) -> Callable[_P, _R]:
         """Test data is transformed from a dict of (id, data) pairs to a list of pytest params to work with the native pytests parametrize function"""
         pytest_testsuite = []
         for id, test_parameters in test_data.items():
@@ -261,14 +265,16 @@ def decorator_func(func):
                         "xfail info needs to be str, or tuple[str, type[Exception]]"
                     )
                 # Set up our fail marker
+                marker: tuple[pytest.MarkDecorator, ...]  # type: ignore[no-redef]
                 marker = (
                     pytest.mark.xfail(reason=reason, raises=raises, strict=strict),
                 )
             else:
-                marker = ()
+                marker = ()  # type: ignore[assignment]
 
             pytest_param = pytest.param(test_parameters, id=id, marks=marker)
             pytest_testsuite.append(pytest_param)
-        return pytest.mark.parametrize(arg_name, pytest_testsuite)(func)
+        decorator = pytest.mark.parametrize(arg_name, pytest_testsuite)
+        return decorator(func)
 
     return decorator_func
@@ -137,8 +137,6 @@ def test_nn_Modules_FP(test_data):
     "test_data",
     test_parameters,
     xfails={
-        "GRUModule": "RuntimeError: Node aten_linear_default with op <EdgeOpOverload: aten.linear[...]> was not decomposed or delegated.",
-        "PReLUModule": "RuntimeError: mul(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.",
         "TransformerModule": "AssertionError: Output 0 does not match reference output.",
     },
 )
Original file line number	Diff line number	Diff line change
`@@ -137,8 +137,6 @@ def test_nn_Modules_FP(test_data):`
`137`	`137`	`"test_data",`
`138`	`138`	`test_parameters,`
`139`	`139`	`xfails={`
`140`		`- "GRUModule": "RuntimeError: Node aten_linear_default with op <EdgeOpOverload: aten.linear[...]> was not decomposed or delegated.",`
`141`		`- "PReLUModule": "RuntimeError: mul(): functions with out=... arguments don't support automatic differentiation, but one of the arguments requires grad.",`
`142`	`140`	`"TransformerModule": "AssertionError: Output 0 does not match reference output.",`
`143`	`141`	`},`
`144`	`142`	`)`