Skip to content

Commit 273689a

Browse files
authored
Merge branch 'main' into ph-mypy-ops-test-misc
2 parents dbab1cd + 6abe901 commit 273689a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+825
-701
lines changed

.github/workflows/trunk.yml

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,45 +1043,6 @@ jobs:
10431043
build-tool: cmake
10441044
docker-image: ci-image:executorch-ubuntu-22.04-clang12
10451045

1046-
test-mcu-models:
1047-
name: test-mcu-models
1048-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1049-
strategy:
1050-
matrix:
1051-
include:
1052-
- build-tool: cmake
1053-
fail-fast: false
1054-
permissions:
1055-
id-token: write
1056-
contents: read
1057-
with:
1058-
runner: linux.2xlarge
1059-
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
1060-
submodules: 'recursive'
1061-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1062-
timeout: 90
1063-
script: |
1064-
BUILD_TOOL=${{ matrix.build-tool }}
1065-
1066-
# The generic Linux job chooses to use base env, not the one setup by the image
1067-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1068-
conda activate "${CONDA_ENV}"
1069-
1070-
# Try to mirror these as closely as possible
1071-
source .ci/scripts/utils.sh
1072-
install_executorch "--use-pt-pinned-commit"
1073-
1074-
.ci/scripts/setup-arm-baremetal-tools.sh
1075-
source examples/arm/ethos-u-scratch/setup_path.sh
1076-
1077-
# Run selective Build
1078-
chmod +x examples/selective_build/test_selective_build.sh
1079-
examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
1080-
1081-
# Run MCU models
1082-
chmod +x examples/arm/run_mcu_models_fvp.sh
1083-
examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
1084-
10851046
test-models-windows:
10861047
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
10871048
strategy:

CODEOWNERS

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,31 +49,31 @@
4949
/extension/export_util @kimishpatel
5050
/extension/flat_tensor @lucylq
5151
/extension/gguf_util @larryliu0820
52-
/extension/kernel_util @kimishpatel @manuelcandales @swolchok
53-
/extension/llm @jackzhxng @larryliu0820 @swolchok @mergennachin
54-
/extension/memory_allocator @JacobSzwejbka @swolchok
52+
/extension/kernel_util @kimishpatel @manuelcandales
53+
/extension/llm @jackzhxng @larryliu0820 @mergennachin
54+
/extension/memory_allocator @JacobSzwejbka
5555
/extension/module @shoumikhin
56-
/extension/parallel @kimishpatel @swolchok
56+
/extension/parallel @kimishpatel
5757
/extension/pybindings @JacobSzwejbka @larryliu0820
58-
/extension/pytree @JacobSzwejbka @swolchok
59-
/extension/runner_util @swolchok
58+
/extension/pytree @JacobSzwejbka
59+
/extension/runner_util
6060
/extension/tensor @shoumikhin
61-
/extension/testing_util @swolchok
62-
/extension/threadpool @kimishpatel @swolchok
61+
/extension/testing_util
62+
/extension/threadpool @kimishpatel
6363
/extension/training @JacobSzwejbka
6464

65-
/kernels @manuelcandales @swolchok
65+
/kernels @manuelcandales
6666

6767
/profiler @Gasoonjia
6868

69-
/runtime @JacobSzwejbka @lucylq @swolchok
69+
/runtime @JacobSzwejbka @lucylq
7070
/runtime/backend @cccclai
7171

7272
/schema @JacobSzwejbka @lucylq
7373

74-
/scripts @GregoryComer @swolchok
74+
/scripts @GregoryComer
7575

76-
/shim @larryliu0820 @GregoryComer @swolchok
76+
/shim @larryliu0820 @GregoryComer
7777

7878
/third-party @GregoryComer
7979

backends/aoti/aoti_partitioner.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Callable, Dict, List, Optional, Tuple
8+
9+
import torch
10+
from executorch.exir._warnings import experimental
11+
from executorch.exir.backend.compile_spec_schema import CompileSpec
12+
from executorch.exir.backend.partitioner import (
13+
DelegationSpec,
14+
Partitioner,
15+
PartitionResult,
16+
)
17+
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
18+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
19+
from torch.export.exported_program import ExportedProgram
20+
21+
22+
@experimental(
23+
"This API and all of cuda backend related functionality are experimental."
24+
)
25+
class AotiPartitioner(Partitioner):
26+
"""
27+
Base partitioner for AOTInductor-driven backend integration.
28+
29+
This partitioner creates a single partition containing all operators from the input graph.
30+
It skips core ATen decomposition, allowing the backend to handle decomposition using
31+
AOTInductor's backend-specific decomposition table.
32+
33+
Only operators that cannot be handled by the aoti library will be excluded from
34+
the partition and fall back to ExecuTorch's default or custom handling.
35+
"""
36+
37+
def __init__(self, backend_name: str, compile_spec: List[CompileSpec]) -> None:
38+
"""
39+
Initialize the AOTI partitioner.
40+
41+
Args:
42+
backend_name: The name of the backend (e.g., "CudaBackend", "MetalBackend")
43+
compile_spec: List of compilation specifications
44+
"""
45+
self.delegation_spec = DelegationSpec(backend_name, compile_spec)
46+
47+
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
48+
"""
49+
Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
50+
"""
51+
52+
partition_tags: Dict[str, DelegationSpec] = {}
53+
tag = "tag0"
54+
55+
for node in exported_program.graph.nodes:
56+
if node.op != "call_function":
57+
continue
58+
node.meta["delegation_tag"] = tag
59+
60+
partition_tags[tag] = self.delegation_spec
61+
62+
tag_constant_data(exported_program)
63+
tag_mutated_buffer(exported_program)
64+
65+
# Tag constant placeholders that have no users
66+
# tag_constant_data only tags constants that have users with delegation_tag
67+
# but we need to tag all constants for this partition
68+
for node in exported_program.graph.nodes:
69+
if node.op == "placeholder" and (
70+
is_param(exported_program, node)
71+
or is_buffer(exported_program, node)
72+
or is_lifted_tensor_constant(exported_program, node)
73+
):
74+
if "delegation_tag" not in node.meta:
75+
node.meta["delegation_tag"] = tag
76+
77+
return PartitionResult(
78+
tagged_exported_program=exported_program, partition_tags=partition_tags
79+
)
80+
81+
def ops_to_not_decompose(
82+
self, ep: ExportedProgram
83+
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
84+
"""
85+
Return a list of operations that should not be decomposed and let the AOT compiler handle them.
86+
Currently we skip ATen decompositon for all ops, and let the backend handle them.
87+
"""
88+
do_not_decompose = set()
89+
90+
for node in ep.graph.nodes:
91+
if node.op == "call_function" and isinstance(
92+
node.target, torch._ops.OpOverload
93+
):
94+
do_not_decompose.add(node.target)
95+
return list(do_not_decompose), None

backends/aoti/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
22

33
def define_common_targets():
4+
runtime.python_library(
5+
name = "aoti_partitioner",
6+
srcs = [
7+
"aoti_partitioner.py",
8+
],
9+
visibility = [
10+
"//executorch/...",
11+
],
12+
deps = [
13+
"//caffe2:torch",
14+
"//executorch/exir/backend:partitioner",
15+
"//executorch/exir/backend:utils",
16+
],
17+
)
18+
419
# AOTI common shims functionality
520
runtime.cxx_library(
621
name = "common_shims",

backends/apple/metal/metal_partitioner.py

Lines changed: 5 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4,74 +4,22 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
from typing import Callable, Dict, final, List, Optional, Tuple
7+
from typing import final, List
88

9-
import torch
9+
from executorch.backends.aoti.aoti_partitioner import AotiPartitioner
1010
from executorch.backends.apple.metal.metal_backend import MetalBackend # usort: skip
1111
from executorch.exir._warnings import experimental
1212
from executorch.exir.backend.compile_spec_schema import CompileSpec
13-
from executorch.exir.backend.partitioner import (
14-
DelegationSpec,
15-
Partitioner,
16-
PartitionResult,
17-
)
18-
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
19-
from torch.export.exported_program import ExportedProgram
2013

2114

2215
@final
2316
@experimental(
2417
"This API and all of Metal backend related functionality are experimental."
2518
)
26-
class MetalPartitioner(Partitioner):
19+
class MetalPartitioner(AotiPartitioner):
2720
"""
28-
Metal partitioner for AOTInductor backend integration.
29-
30-
This partitioner creates a single partition containing all operators from the input graph.
31-
It skips core ATen decomposition, allowing the Metal backend to handle decomposition using
32-
AOTInductor's MPS-specific decomposition table.
33-
34-
Only operators that cannot be handled by the aoti-mps library will be excluded from
35-
the partition and fall back to ExecuTorch's default or custom handling.
21+
Metal partitioner driven by AOTInductor backend.
3622
"""
3723

3824
def __init__(self, compile_spec: List[CompileSpec]) -> None:
39-
self.delegation_spec = DelegationSpec(MetalBackend.__name__, compile_spec)
40-
41-
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
42-
"""
43-
Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
44-
"""
45-
46-
partition_tags: Dict[str, DelegationSpec] = {}
47-
tag = "tag0"
48-
49-
for node in exported_program.graph.nodes:
50-
if node.op != "call_function":
51-
continue
52-
node.meta["delegation_tag"] = tag
53-
54-
partition_tags[tag] = self.delegation_spec
55-
56-
tag_constant_data(exported_program)
57-
tag_mutated_buffer(exported_program)
58-
59-
return PartitionResult(
60-
tagged_exported_program=exported_program, partition_tags=partition_tags
61-
)
62-
63-
def ops_to_not_decompose(
64-
self, ep: ExportedProgram
65-
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
66-
"""
67-
Return a list of operations that should not be decomposed and let the AOT compiler handle them.
68-
Currently we skip ATen decompositon for all ops, and let the Metal backend handle them.
69-
"""
70-
do_not_decompose = set()
71-
72-
for node in ep.graph.nodes:
73-
if node.op == "call_function" and isinstance(
74-
node.target, torch._ops.OpOverload
75-
):
76-
do_not_decompose.add(node.target)
77-
return list(do_not_decompose), None
25+
super().__init__(MetalBackend.__name__, compile_spec)

backends/arm/test/ops/test_slice.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,13 +76,7 @@ def test_slice_tensor_tosa_INT_nhwc(test_data: torch.Tensor):
7676
pipeline.run()
7777

7878

79-
x_fails = {
80-
"ones_slice_3": "MLETORCH-1402: Compiler limitation when passing more than 255 char as argument to FVP.",
81-
"ones_slice_4": "MLETORCH-1402: Compiler limitation when passing more than 255 char as argument to FVP.",
82-
}
83-
84-
85-
@common.parametrize("test_data", test_data_suite, x_fails)
79+
@common.parametrize("test_data", test_data_suite)
8680
@common.XfailIfNoCorstone300
8781
def test_slice_tensor_u55_INT(test_data: torch.Tensor):
8882
pipeline = EthosU55PipelineINT[input_t1](
@@ -94,7 +88,7 @@ def test_slice_tensor_u55_INT(test_data: torch.Tensor):
9488
pipeline.run()
9589

9690

97-
@common.parametrize("test_data", test_data_suite, x_fails)
91+
@common.parametrize("test_data", test_data_suite)
9892
@common.XfailIfNoCorstone320
9993
def test_slice_tensor_u85_INT(test_data: torch.Tensor):
10094
pipeline = EthosU85PipelineINT[input_t1](
@@ -175,7 +169,7 @@ def test_slice_tensor_16a8w_tosa_INT(test_data: torch.Tensor):
175169
pipeline.run()
176170

177171

178-
@common.parametrize("test_data", test_data_suite, x_fails)
172+
@common.parametrize("test_data", test_data_suite)
179173
@common.XfailIfNoCorstone300
180174
def test_slice_tensor_16a8w_u55_INT16(test_data: torch.Tensor):
181175
"""Test slice operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
@@ -199,7 +193,7 @@ def test_slice_tensor_16a8w_u55_INT16(test_data: torch.Tensor):
199193
pipeline.run()
200194

201195

202-
@common.parametrize("test_data", test_data_suite, x_fails)
196+
@common.parametrize("test_data", test_data_suite)
203197
@common.XfailIfNoCorstone320
204198
def test_slice_tensor_16a8w_u85_INT16(test_data: torch.Tensor):
205199
"""Test slice operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""

backends/arm/test/ops/test_split.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -139,17 +139,7 @@ def test_split_with_sizes_tosa_INT(test_data: input_t1):
139139
pipeline.run()
140140

141141

142-
x_fails = {
143-
"split_3d_2_sizes_dim": "MLETORCH-1403: Split operator is running out of memory when reading input file",
144-
"split_4d_2_sizes_dim_neg": "MLETORCH-1403: Split operator is running out of memory when reading input file",
145-
}
146-
147-
148-
@common.parametrize(
149-
"test_data",
150-
(Split.test_data | Split.test_data_list),
151-
x_fails,
152-
)
142+
@common.parametrize("test_data", (Split.test_data | Split.test_data_list))
153143
@common.XfailIfNoCorstone300
154144
def test_split_with_sizes_u55_INT(test_data: input_t1):
155145
pipeline = EthosU55PipelineINT[input_t1](
@@ -161,11 +151,7 @@ def test_split_with_sizes_u55_INT(test_data: input_t1):
161151
pipeline.run()
162152

163153

164-
@common.parametrize(
165-
"test_data",
166-
(Split.test_data | Split.test_data_list),
167-
x_fails,
168-
)
154+
@common.parametrize("test_data", (Split.test_data | Split.test_data_list))
169155
@common.XfailIfNoCorstone320
170156
def test_split_with_sizes_u85_INT(test_data: input_t1):
171157
pipeline = EthosU85PipelineINT[input_t1](

0 commit comments

Comments
 (0)