Skip to content

Commit d8bc294

Browse files
authored
Merge branch 'main' into patch-1
2 parents 153bd21 + ed91b6a commit d8bc294

File tree

135 files changed

+1517
-1013
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+1517
-1013
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ build_qnn_backend() {
1818
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1919

2020
parallelism=$(( $(nproc) - 1 ))
21-
bash backends/qualcomm/scripts/build.sh --skip_aarch64 --job_number ${parallelism} --release
21+
bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release
2222
}
2323

2424
set_up_aot() {

.github/workflows/_unittest.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ jobs:
3232
id-token: write
3333
contents: read
3434
with:
35-
runner: linux.2xlarge
35+
runner: linux.2xlarge.memory
3636
docker-image: ${{ inputs.docker-image }}
3737
submodules: 'recursive'
3838
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

.github/workflows/cuda.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ jobs:
128128
echo "::endgroup::"
129129
130130
echo "::group::Setup Huggingface"
131-
pip install -U "huggingface_hub[cli]" accelerate
131+
pip install -U "huggingface_hub[cli]<1.0" accelerate
132132
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
133133
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
134134
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -208,7 +208,7 @@ jobs:
208208
echo "::endgroup::"
209209
210210
echo "::group::Setup Huggingface"
211-
pip install -U "huggingface_hub[cli]" accelerate
211+
pip install -U "huggingface_hub[cli]<1.0" accelerate
212212
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
213213
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
214214
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}

.github/workflows/metal.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ jobs:
3030
3131
export-voxtral-metal-artifact:
3232
name: export-voxtral-metal-artifact
33+
# Skip this job if the pull request is from a fork (HuggingFace secrets are not available)
34+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
3335
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
3436
secrets: inherit
3537
with:
@@ -44,7 +46,7 @@ jobs:
4446
set -eux
4547
4648
echo "::group::Setup Huggingface"
47-
${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
49+
${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
4850
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
4951
echo "::endgroup::"
5052

.github/workflows/pull.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ jobs:
315315
echo "::endgroup::"
316316
317317
echo "::group::Setup Huggingface"
318-
pip install -U "huggingface_hub[cli]" accelerate
318+
pip install -U "huggingface_hub[cli]<1.0" accelerate
319319
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
320320
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
321321
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -636,7 +636,7 @@ jobs:
636636
echo "::group::Setup ExecuTorch"
637637
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
638638
echo "::endgroup::"
639-
639+
640640
echo "::group::Setup requirements"
641641
# install phi-3-mini requirements
642642
bash examples/models/phi-3-mini/install_requirements.sh
@@ -909,6 +909,8 @@ jobs:
909909
910910
test-samsung-models-linux:
911911
name: test-samsung-models-linux
912+
# Skip this job if the pull request is from a fork (secrets are not available)
913+
if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request'
912914
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
913915
permissions:
914916
id-token: write

.github/workflows/trunk.yml

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ jobs:
626626
BUILD_TORCHAO_EXPERIMENTAL=1 TORCHAO_BUILD_CPU_AARCH64=1 TORCHAO_BUILD_KLEIDIAI=1 TORCHAO_ENABLE_ARM_NEON_DOT=1 TORCHAO_PARALLEL_BACKEND=OPENMP pip install third-party/ao
627627
fi
628628
629-
pip install -U "huggingface_hub[cli]"
629+
pip install -U "huggingface_hub[cli]<1.0"
630630
631631
bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.model != 'phi_4_mini' && '--test_with_runner' || '' }} ${{ matrix.backend == 'torchao' && '--use_torchao_kernels' || '' }}
632632
@@ -659,7 +659,7 @@ jobs:
659659
echo "::endgroup::"
660660
661661
echo "::group::Set up Huggingface"
662-
${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
662+
${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
663663
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
664664
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
665665
${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -834,7 +834,7 @@ jobs:
834834
echo "::endgroup::"
835835
836836
echo "::group::Setup Huggingface"
837-
pip install -U "huggingface_hub[cli]" accelerate
837+
pip install -U "huggingface_hub[cli]<1.0" accelerate
838838
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
839839
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
840840
pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -930,7 +930,7 @@ jobs:
930930
echo "::endgroup::"
931931
932932
echo "::group::Set up Huggingface"
933-
${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
933+
${CONDA_RUN} pip install -U "huggingface_hub[cli]<1.0" accelerate
934934
${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
935935
OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
936936
${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
@@ -1043,45 +1043,6 @@ jobs:
10431043
build-tool: cmake
10441044
docker-image: ci-image:executorch-ubuntu-22.04-clang12
10451045

1046-
test-mcu-models:
1047-
name: test-mcu-models
1048-
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
1049-
strategy:
1050-
matrix:
1051-
include:
1052-
- build-tool: cmake
1053-
fail-fast: false
1054-
permissions:
1055-
id-token: write
1056-
contents: read
1057-
with:
1058-
runner: linux.2xlarge
1059-
docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
1060-
submodules: 'recursive'
1061-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1062-
timeout: 90
1063-
script: |
1064-
BUILD_TOOL=${{ matrix.build-tool }}
1065-
1066-
# The generic Linux job chooses to use base env, not the one setup by the image
1067-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1068-
conda activate "${CONDA_ENV}"
1069-
1070-
# Try to mirror these as closely as possible
1071-
source .ci/scripts/utils.sh
1072-
install_executorch "--use-pt-pinned-commit"
1073-
1074-
.ci/scripts/setup-arm-baremetal-tools.sh
1075-
source examples/arm/ethos-u-scratch/setup_path.sh
1076-
1077-
# Run selective Build
1078-
chmod +x examples/selective_build/test_selective_build.sh
1079-
examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
1080-
1081-
# Run MCU models
1082-
chmod +x examples/arm/run_mcu_models_fvp.sh
1083-
examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
1084-
10851046
test-models-windows:
10861047
uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
10871048
strategy:

CODEOWNERS

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,31 +49,31 @@
4949
/extension/export_util @kimishpatel
5050
/extension/flat_tensor @lucylq
5151
/extension/gguf_util @larryliu0820
52-
/extension/kernel_util @kimishpatel @manuelcandales @swolchok
53-
/extension/llm @jackzhxng @larryliu0820 @swolchok @mergennachin
54-
/extension/memory_allocator @JacobSzwejbka @swolchok
52+
/extension/kernel_util @kimishpatel @manuelcandales
53+
/extension/llm @jackzhxng @larryliu0820 @mergennachin
54+
/extension/memory_allocator @JacobSzwejbka
5555
/extension/module @shoumikhin
56-
/extension/parallel @kimishpatel @swolchok
56+
/extension/parallel @kimishpatel
5757
/extension/pybindings @JacobSzwejbka @larryliu0820
58-
/extension/pytree @JacobSzwejbka @swolchok
59-
/extension/runner_util @swolchok
58+
/extension/pytree @JacobSzwejbka
59+
/extension/runner_util
6060
/extension/tensor @shoumikhin
61-
/extension/testing_util @swolchok
62-
/extension/threadpool @kimishpatel @swolchok
61+
/extension/testing_util
62+
/extension/threadpool @kimishpatel
6363
/extension/training @JacobSzwejbka
6464

65-
/kernels @manuelcandales @swolchok
65+
/kernels @manuelcandales
6666

6767
/profiler @Gasoonjia
6868

69-
/runtime @JacobSzwejbka @lucylq @swolchok
69+
/runtime @JacobSzwejbka @lucylq
7070
/runtime/backend @cccclai
7171

7272
/schema @JacobSzwejbka @lucylq
7373

74-
/scripts @GregoryComer @swolchok
74+
/scripts @GregoryComer
7575

76-
/shim @larryliu0820 @GregoryComer @swolchok
76+
/shim @larryliu0820 @GregoryComer
7777

7878
/third-party @GregoryComer
7979

backends/aoti/aoti_partitioner.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
from typing import Callable, Dict, List, Optional, Tuple
8+
9+
import torch
10+
from executorch.exir._warnings import experimental
11+
from executorch.exir.backend.compile_spec_schema import CompileSpec
12+
from executorch.exir.backend.partitioner import (
13+
DelegationSpec,
14+
Partitioner,
15+
PartitionResult,
16+
)
17+
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
18+
from torch._export.utils import is_buffer, is_lifted_tensor_constant, is_param
19+
from torch.export.exported_program import ExportedProgram
20+
21+
22+
@experimental(
23+
"This API and all of cuda backend related functionality are experimental."
24+
)
25+
class AotiPartitioner(Partitioner):
26+
"""
27+
Base partitioner for AOTInductor-driven backend integration.
28+
29+
This partitioner creates a single partition containing all operators from the input graph.
30+
It skips core ATen decomposition, allowing the backend to handle decomposition using
31+
AOTInductor's backend-specific decomposition table.
32+
33+
Only operators that cannot be handled by the aoti library will be excluded from
34+
the partition and fall back to ExecuTorch's default or custom handling.
35+
"""
36+
37+
def __init__(self, backend_name: str, compile_spec: List[CompileSpec]) -> None:
38+
"""
39+
Initialize the AOTI partitioner.
40+
41+
Args:
42+
backend_name: The name of the backend (e.g., "CudaBackend", "MetalBackend")
43+
compile_spec: List of compilation specifications
44+
"""
45+
self.delegation_spec = DelegationSpec(backend_name, compile_spec)
46+
47+
def partition(self, exported_program: ExportedProgram) -> PartitionResult:
48+
"""
49+
Fully delegate the graph to AOTInductor by tagging all nodes as a single partition.
50+
"""
51+
52+
partition_tags: Dict[str, DelegationSpec] = {}
53+
tag = "tag0"
54+
55+
for node in exported_program.graph.nodes:
56+
if node.op != "call_function":
57+
continue
58+
node.meta["delegation_tag"] = tag
59+
60+
partition_tags[tag] = self.delegation_spec
61+
62+
tag_constant_data(exported_program)
63+
tag_mutated_buffer(exported_program)
64+
65+
# Tag constant placeholders that have no users
66+
# tag_constant_data only tags constants that have users with delegation_tag
67+
# but we need to tag all constants for this partition
68+
for node in exported_program.graph.nodes:
69+
if node.op == "placeholder" and (
70+
is_param(exported_program, node)
71+
or is_buffer(exported_program, node)
72+
or is_lifted_tensor_constant(exported_program, node)
73+
):
74+
if "delegation_tag" not in node.meta:
75+
node.meta["delegation_tag"] = tag
76+
77+
return PartitionResult(
78+
tagged_exported_program=exported_program, partition_tags=partition_tags
79+
)
80+
81+
def ops_to_not_decompose(
82+
self, ep: ExportedProgram
83+
) -> Tuple[List[torch._ops.OpOverload], Optional[Callable[[torch.fx.Node], bool]]]:
84+
"""
85+
Return a list of operations that should not be decomposed and let the AOT compiler handle them.
86+
Currently we skip ATen decompositon for all ops, and let the backend handle them.
87+
"""
88+
do_not_decompose = set()
89+
90+
for node in ep.graph.nodes:
91+
if node.op == "call_function" and isinstance(
92+
node.target, torch._ops.OpOverload
93+
):
94+
do_not_decompose.add(node.target)
95+
return list(do_not_decompose), None

backends/aoti/common_shims.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ int32_t aoti_torch_dtype_int32() {
184184
return 3; // PyTorch's int32 dtype code
185185
}
186186

187+
int32_t aoti_torch_dtype_bool() {
188+
return 11; // PyTorch's bool dtype code
189+
}
190+
187191
int32_t aoti_torch_dtype_int64() {
188192
return 4; // PyTorch's int64 dtype code
189193
}

backends/aoti/common_shims.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ int32_t aoti_torch_dtype_int8();
6363
int32_t aoti_torch_dtype_int16();
6464
int32_t aoti_torch_dtype_int32();
6565
int32_t aoti_torch_dtype_int64();
66+
int32_t aoti_torch_dtype_bool();
6667

6768
// Dtype utility function needed by Metal backend
6869
size_t aoti_torch_dtype_element_size(int32_t dtype);

0 commit comments

Comments
 (0)