Skip to content

Commit 295e014

Browse files
authored
Merge branch 'main' into sum
2 parents fe01154 + ed91b6a commit 295e014

File tree

120 files changed

+1244
-824
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

120 files changed

+1244
-824
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ build_qnn_backend() {
1818
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1919

2020
parallelism=$(( $(nproc) - 1 ))
21-
bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release
21+
bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release
2222
}
2323

2424
set_up_aot() {

.github/workflows/pull.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -909,6 +909,8 @@ jobs:
909909
910910
test-samsung-models-linux:
911911
name: test-samsung-models-linux
912+
# Skip this job if the pull request is from a fork (secrets are not available)
913+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
912914
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
913915
permissions:
914916
id-token: write

.github/workflows/trunk.yml

Lines changed: 0 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1043,45 +1043,6 @@ jobs:
10431043
build-tool: cmake
10441044
docker-image: ci-image:executorch-ubuntu-22.04-clang12
10451045

1046-
test-mcu-models:
1047-
name: test-mcu-models
1048-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1049-
strategy:
1050-
matrix:
1051-
include:
1052-
- build-tool: cmake
1053-
fail-fast: false
1054-
permissions:
1055-
id-token: write
1056-
contents: read
1057-
with:
1058-
runner: linux.2xlarge
1059-
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
1060-
submodules: 'recursive'
1061-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1062-
timeout: 90
1063-
script: |
1064-
BUILD_TOOL=${{ matrix.build-tool }}
1065-
1066-
# The generic Linux job chooses to use base env, not the one setup by the image
1067-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1068-
conda activate "${CONDA_ENV}"
1069-
1070-
# Try to mirror these as closely as possible
1071-
source .ci/scripts/utils.sh
1072-
install_executorch "--use-pt-pinned-commit"
1073-
1074-
.ci/scripts/setup-arm-baremetal-tools.sh
1075-
source examples/arm/ethos-u-scratch/setup_path.sh
1076-
1077-
# Run selective Build
1078-
chmod +x examples/selective_build/test_selective_build.sh
1079-
examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
1080-
1081-
# Run MCU models
1082-
chmod +x examples/arm/run_mcu_models_fvp.sh
1083-
examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
1084-
10851046
test-models-windows:
10861047
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
10871048
strategy:

CODEOWNERS

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,31 +49,31 @@
4949
/extension/export_util @kimishpatel
5050
/extension/flat_tensor @lucylq
5151
/extension/gguf_util @larryliu0820
52-
/extension/kernel_util @kimishpatel @manuelcandales @swolchok
53-
/extension/llm @jackzhxng @larryliu0820 @swolchok @mergennachin
54-
/extension/memory_allocator @JacobSzwejbka @swolchok
52+
/extension/kernel_util @kimishpatel @manuelcandales
53+
/extension/llm @jackzhxng @larryliu0820 @mergennachin
54+
/extension/memory_allocator @JacobSzwejbka
5555
/extension/module @shoumikhin
56-
/extension/parallel @kimishpatel @swolchok
56+
/extension/parallel @kimishpatel
5757
/extension/pybindings @JacobSzwejbka @larryliu0820
58-
/extension/pytree @JacobSzwejbka @swolchok
59-
/extension/runner_util @swolchok
58+
/extension/pytree @JacobSzwejbka
59+
/extension/runner_util
6060
/extension/tensor @shoumikhin
61-
/extension/testing_util @swolchok
62-
/extension/threadpool @kimishpatel @swolchok
61+
/extension/testing_util
62+
/extension/threadpool @kimishpatel
6363
/extension/training @JacobSzwejbka
6464

65-
/kernels @manuelcandales @swolchok
65+
/kernels @manuelcandales
6666

6767
/profiler @Gasoonjia
6868

69-
/runtime @JacobSzwejbka @lucylq @swolchok
69+
/runtime @JacobSzwejbka @lucylq
7070
/runtime/backend @cccclai
7171

7272
/schema @JacobSzwejbka @lucylq
7373

74-
/scripts @GregoryComer @swolchok
74+
/scripts @GregoryComer
7575

76-
/shim @larryliu0820 @GregoryComer @swolchok
76+
/shim @larryliu0820 @GregoryComer
7777

7878
/third-party @GregoryComer
7979

backends/aoti/aoti_partitioner.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Callable, Dict, List, Optional, Tuple
8+
9+
import torch
10+
from executorch.exir._warnings import experimental
11+
from executorch.exir.backend.compile_spec_schema import CompileSpec
12+
from executorch.exir.backend.partitioner import (
13+
DelegationSpec,
14+
Partitioner,
15+
PartitionResult,
16+
)
17+
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
18+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
19+
from torch.export.exported_program import ExportedProgram
20+
21+
22+
@experimental(
23+
"This API and all of cuda backend related functionality are experimental."
24+
)
25+
class AotiPartitioner(Partitioner):
26+
"""
27+
Base partitioner for AOTInductor-driven backend integration.
28+
29+
This partitioner creates a single partition containing all operators from the input graph.
30+
It skips core ATen decomposition, allowing the backend to handle decomposition using
31+
AOTInductor's backend-specific decomposition table.
32+
33+
Only operators that cannot be handled by the aoti library will be excluded from
34+
the partition and fall back to ExecuTorch's default or custom handling.
35+
"""
36+
37+
def __init__(self, backend_name: str, compile_spec: List[CompileSpec]) -> None:
38+
"""
39+
Initialize the AOTI partitioner.
40+
41+
Args:
42+
backend_name: The name of the backend (e.g., "CudaBackend", "MetalBackend")
43+
compile_spec: List of compilation specifications
44+
"""
45+
self.delegation_spec = DelegationSpec(backend_name, compile_spec)
46+
47+
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
48+
"""
49+
Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
50+
"""
51+
52+
partition_tags: Dict[str, DelegationSpec] = {}
53+
tag = "tag0"
54+
55+
for node in exported_program.graph.nodes:
56+
if node.op != "call_function":
57+
continue
58+
node.meta["delegation_tag"] = tag
59+
60+
partition_tags[tag] = self.delegation_spec
61+
62+
tag_constant_data(exported_program)
63+
tag_mutated_buffer(exported_program)
64+
65+
# Tag constant placeholders that have no users
66+
# tag_constant_data only tags constants that have users with delegation_tag
67+
# but we need to tag all constants for this partition
68+
for node in exported_program.graph.nodes:
69+
if node.op == "placeholder" and (
70+
is_param(exported_program, node)
71+
or is_buffer(exported_program, node)
72+
or is_lifted_tensor_constant(exported_program, node)
73+
):
74+
if "delegation_tag" not in node.meta:
75+
node.meta["delegation_tag"] = tag
76+
77+
return PartitionResult(
78+
tagged_exported_program=exported_program, partition_tags=partition_tags
79+
)
80+
81+
def ops_to_not_decompose(
82+
self, ep: ExportedProgram
83+
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
84+
"""
85+
Return a list of operations that should not be decomposed and let the AOT compiler handle them.
86+
Currently we skip ATen decompositon for all ops, and let the backend handle them.
87+
"""
88+
do_not_decompose = set()
89+
90+
for node in ep.graph.nodes:
91+
if node.op == "call_function" and isinstance(
92+
node.target, torch._ops.OpOverload
93+
):
94+
do_not_decompose.add(node.target)
95+
return list(do_not_decompose), None

backends/aoti/common_shims.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ int32_t aoti_torch_dtype_int32() {
184184
return 3; // PyTorch's int32 dtype code
185185
}
186186

187+
int32_t aoti_torch_dtype_bool() {
188+
return 11; // PyTorch's bool dtype code
189+
}
190+
187191
int32_t aoti_torch_dtype_int64() {
188192
return 4; // PyTorch's int64 dtype code
189193
}

backends/aoti/common_shims.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ int32_t aoti_torch_dtype_int8();
6363
int32_t aoti_torch_dtype_int16();
6464
int32_t aoti_torch_dtype_int32();
6565
int32_t aoti_torch_dtype_int64();
66+
int32_t aoti_torch_dtype_bool();
6667

6768
// Dtype utility function needed by Metal backend
6869
size_t aoti_torch_dtype_element_size(int32_t dtype);

backends/aoti/targets.bzl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,21 @@
11
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
22

33
def define_common_targets():
4+
runtime.python_library(
5+
name = "aoti_partitioner",
6+
srcs = [
7+
"aoti_partitioner.py",
8+
],
9+
visibility = [
10+
"//executorch/...",
11+
],
12+
deps = [
13+
"//caffe2:torch",
14+
"//executorch/exir/backend:partitioner",
15+
"//executorch/exir/backend:utils",
16+
],
17+
)
18+
419
# AOTI common shims functionality
520
runtime.cxx_library(
621
name = "common_shims",

backends/aoti/tests/test_common_shims.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,3 +322,14 @@ TEST_F(CommonShimsTest, IndependentCaches) {
322322
// Sizes and strides pointers should be different (different caches)
323323
EXPECT_NE(sizes_ptr1, strides_ptr1);
324324
}
325+
326+
// Test all dtype functions return correct PyTorch dtype codes
327+
TEST_F(CommonShimsTest, AllDtypesReturnCorrectValues) {
328+
EXPECT_EQ(aoti_torch_dtype_float32(), 6); // PyTorch's float32 dtype code
329+
EXPECT_EQ(aoti_torch_dtype_bfloat16(), 15); // PyTorch's bfloat16 dtype code
330+
EXPECT_EQ(aoti_torch_dtype_int8(), 1); // PyTorch's int8 dtype code
331+
EXPECT_EQ(aoti_torch_dtype_int16(), 2); // PyTorch's int16 dtype code
332+
EXPECT_EQ(aoti_torch_dtype_int32(), 3); // PyTorch's int32 dtype code
333+
EXPECT_EQ(aoti_torch_dtype_int64(), 4); // PyTorch's int64 dtype code
334+
EXPECT_EQ(aoti_torch_dtype_bool(), 11); // PyTorch's bool dtype code
335+
}

backends/aoti/utils.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ inline executorch::aten::ScalarType dtype_to_scalar_type(int32_t dtype) {
4545
return executorch::aten::ScalarType::Long;
4646
case 6: // PyTorch's float32 dtype code
4747
return executorch::aten::ScalarType::Float;
48+
case 11: // PyTorch's bool dtype code
49+
return executorch::aten::ScalarType::Bool;
4850
case 15: // PyTorch's bfloat16 dtype code
4951
return executorch::aten::ScalarType::BFloat16;
5052
// Future support for additional dtypes can be added here

0 commit comments

Comments
 (0)