Skip to content

Commit 04ef9e0

Browse files
author
pytorchbot
committed
2025-05-14 nightly release (9ded0a2)
1 parent 4882b9c commit 04ef9e0

File tree

79 files changed

+2506
-1146
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+2506
-1146
lines changed

.ci/scripts/test_model.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ test_model_with_qnn() {
201201
# TODO(guangyang): Make QNN chipset matches the target device
202202
QNN_CHIPSET=SM8450
203203

204-
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
204+
"${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
205205
EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
206206
}
207207

CMakeLists.txt

Lines changed: 16 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ project(executorch)
4949

5050
include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
5151

52+
if(NOT CMAKE_CXX_STANDARD)
53+
set(CMAKE_CXX_STANDARD 17)
54+
endif()
55+
announce_configured_options(CMAKE_CXX_STANDARD)
56+
57+
if(NOT CMAKE_BUILD_TYPE)
58+
set(CMAKE_BUILD_TYPE Debug)
59+
endif()
60+
announce_configured_options(CMAKE_BUILD_TYPE)
61+
62+
announce_configured_options(CMAKE_CXX_COMPILER_ID)
63+
announce_configured_options(CMAKE_TOOLCHAIN_FILE)
64+
announce_configured_options(BUCK2)
65+
announce_configured_options(PYTHON_EXECUTABLE)
66+
5267
load_build_preset()
5368
include(${PROJECT_SOURCE_DIR}/tools/cmake/preset/default.cmake)
5469

@@ -63,14 +78,6 @@ include(ExternalProject)
6378

6479
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
6580

66-
if(NOT CMAKE_CXX_STANDARD)
67-
set(CMAKE_CXX_STANDARD 17)
68-
endif()
69-
70-
if(NOT CMAKE_BUILD_TYPE)
71-
set(CMAKE_BUILD_TYPE Debug)
72-
endif()
73-
7481
# Setup RPATH.
7582
# See https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling
7683
# Use separate rpaths during build and install phases
@@ -128,22 +135,6 @@ else()
128135
set(CMAKE_CXX_FLAGS_RELEASE "-O2 ${CMAKE_CXX_FLAGS_RELEASE}")
129136
endif()
130137

131-
#
132-
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
133-
#
134-
cmake_dependent_option(
135-
EXECUTORCH_BUILD_PTHREADPOOL "Build pthreadpool library." ON
136-
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
137-
)
138-
139-
#
140-
# cpuinfo: build cpuinfo library. Disable on unsupported platforms
141-
#
142-
cmake_dependent_option(
143-
EXECUTORCH_BUILD_CPUINFO "Build cpuinfo library." ON
144-
"NOT EXECUTORCH_BUILD_ARM_BAREMETAL" OFF
145-
)
146-
147138
add_subdirectory(third-party)
148139

149140
if(EXECUTORCH_BUILD_EXTENSION_TRAINING)
@@ -475,14 +466,6 @@ install(
475466
)
476467
install(FILES tools/cmake/executorch-config.cmake DESTINATION lib/cmake/ExecuTorch)
477468

478-
#
479-
# executor_runner: Host tool that demonstrates program execution.
480-
#
481-
cmake_dependent_option(
482-
EXECUTORCH_BUILD_EXECUTOR_RUNNER "Build the executor_runner executable" ON
483-
"NOT CMAKE_TOOLCHAIN_IOS" OFF
484-
)
485-
486469
# Add googletest if any test targets should be built
487470
if(BUILD_TESTING)
488471
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/googletest)
@@ -571,9 +554,7 @@ if(EXECUTORCH_BUILD_EXTENSION_TENSOR)
571554
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/tensor)
572555
endif()
573556

574-
if(EXECUTORCH_BUILD_PTHREADPOOL
575-
AND EXECUTORCH_BUILD_CPUINFO
576-
)
557+
if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
577558
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
578559
endif()
579560

@@ -738,6 +719,3 @@ if(EXECUTORCH_BUILD_VULKAN)
738719
endif()
739720

740721
include(Test.cmake)
741-
742-
# Print all summary
743-
executorch_print_configuration_summary()

CODEOWNERS

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,6 @@
1515
/backends/vulkan @SS-JIA
1616
/backends/xnnpack @digantdesai @mcr229
1717

18-
/build @GregoryComer @kirklandsign
19-
20-
/codegen @larryliu0820 @lucylq
21-
2218
/devtools @tarun292 @Gasoonjia
2319

2420
/docs @mergennachin
@@ -41,7 +37,6 @@
4137
/exir/backend @cccclai @kimishpatel @JacobSzwejbka @tarun292
4238
/exir @JacobSzwejbka @tarun292 @larryliu0820
4339

44-
4540
/extension/android @kirklandsign
4641
/extension/android_test @kirklandsign
4742
/extension/apple @shoumikhin
@@ -83,3 +78,11 @@
8378
/test @larryliu0820 @kirklandsign
8479

8580
/util @tarun292
81+
82+
# Build System -----------------------------------------------------------------
83+
84+
CMakeLists.txt @jathu @larryliu0820 @kirklandsign
85+
CMakePresets.json @jathu @larryliu0820 @kirklandsign
86+
87+
/codegen @larryliu0820 @lucylq
88+
/tools/cmake @jathu @larryliu0820 @kirklandsign

backends/arm/_passes/TARGETS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ python_library(
77
"//executorch/backends/arm:tosa_quant_utils",
88
"//executorch/backends/arm:tosa_utils",
99
"//executorch/backends/transforms:fuse_view_copy",
10+
"//executorch/backends/transforms:remove_getitem_op",
1011
"//executorch/backends/transforms:replace_scalar_with_tensor",
1112
"//executorch/backends/xnnpack/_passes:xnnpack_passes",
1213
"//executorch/exir:lib",

backends/arm/_passes/__init__.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from .decompose_softmax_pass import DecomposeSoftmaxPass # noqa
3333
from .decompose_softmax_unstable_pass import DecomposeSoftmaxUnstablePass # noqa
3434
from .decompose_sqrt_pass import DecomposeSqrtPass # noqa
35+
from .decompose_sum_pass import DecomposeSumPass # noqa
3536
from .decompose_var_pass import DecomposeVarPass # noqa
3637
from .fold_qdq_with_annotated_qparams_pass import ( # noqa
3738
FoldAndAnnotateQParamsPass,
@@ -44,10 +45,8 @@
4445
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
4546
from .insert_rescales_pass import InsertRescalePass # noqa
4647
from .insert_table_ops import InsertTableOpsPass # noqa
47-
from .keep_dims_false_to_squeeze_pass import KeepDimsFalseToSqueezePass # noqa
4848
from .match_arg_ranks_pass import MatchArgRanksPass # noqa
4949
from .match_where_self_arg_dtype_pass import MatchWhereSelfDtypePass # noqa
50-
from .meandim_to_averagepool_pass import ConvertMeanDimToAveragePoolPass # noqa
5150
from .mm_to_bmm_pass import ConvertMmToBmmPass # noqa
5251
from .remove_clone_pass import RemoveClonePass # noqa
5352
from .replace_scalar_with_tensor_pass import ( # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
ConvertAnyDefaultDimDimsPass,
1818
ConvertExpandCopyToRepeatPass,
1919
ConvertFullLikeToFullPass,
20-
ConvertMeanDimToAveragePoolPass,
2120
ConvertMinMaxPass,
2221
ConvertMmToBmmPass,
2322
ConvertSplitToSlicePass,
@@ -37,6 +36,7 @@
3736
DecomposeSoftmaxPass,
3837
DecomposeSoftmaxUnstablePass,
3938
DecomposeSqrtPass,
39+
DecomposeSumPass,
4040
DecomposeVarPass,
4141
FoldAndAnnotateQParamsPass,
4242
FuseBatchnorm2DPass,
@@ -45,7 +45,6 @@
4545
FuseQuantizedActivationPass,
4646
InsertRescalePass,
4747
InsertTableOpsPass,
48-
KeepDimsFalseToSqueezePass,
4948
MatchArgRanksPass,
5049
MatchWhereSelfDtypePass,
5150
QuantizeOperatorArguments,
@@ -87,7 +86,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
8786
self.add_pass(ConvertSplitToSlicePass())
8887
self.add_pass(ConvertMmToBmmPass())
8988
self.add_pass(DecomposeLinearPass())
90-
self.add_pass(ConvertMeanDimToAveragePoolPass())
89+
self.add_pass(DecomposeMeanDimPass())
9190
self.add_pass(ConvertFullLikeToFullPass())
9291
self.add_pass(ConvertToClampPass())
9392
self.add_pass(ConvertMinMaxPass())
@@ -110,7 +109,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
110109
self.add_pass(ConvertExpandCopyToRepeatPass())
111110
self.add_pass(UnsqueezeBeforeRepeatPass())
112111
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
113-
self.add_pass(KeepDimsFalseToSqueezePass())
112+
self.add_pass(DecomposeSumPass())
114113
self.add_pass(Conv1dUnsqueezePass(exported_program))
115114
self.add_pass(DecomposeSelectPass())
116115
self.add_pass(ConvertSqueezesToViewPass())
@@ -140,7 +139,6 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
140139
self.add_pass(DecomposeVarPass())
141140
self.add_pass(DecomposeMeanDimPass())
142141
self.add_pass(DecomposeNotEqualPass())
143-
self.add_pass(ConvertMeanDimToAveragePoolPass())
144142
self.add_pass(DecomposeDivPass())
145143
self.add_pass(DecomposeSoftmaxPass())
146144
self.add_pass(DecomposeGeluPass())
@@ -163,7 +161,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
163161
self.add_pass(ConvertExpandCopyToRepeatPass())
164162
self.add_pass(UnsqueezeBeforeRepeatPass())
165163
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
166-
self.add_pass(KeepDimsFalseToSqueezePass())
164+
self.add_pass(DecomposeSumPass())
167165
self.add_pass(Conv1dUnsqueezePass(exported_program))
168166
self.add_pass(DecomposeSelectPass())
169167
self.add_pass(ConvertSqueezesToViewPass())
@@ -220,4 +218,6 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
220218

221219
self.add_pass(ConvertMinMaxPass())
222220
self.add_pass(ReplaceInfValues())
221+
self.add_pass(DecomposeSumPass())
222+
223223
return self._transform(graph_module)
Lines changed: 89 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,9 @@
11
# Copyright 2024-2025 Arm Limited and/or its affiliates.
2-
# All rights reserved.
32
#
43
# This source code is licensed under the BSD-style license found in the
54
# LICENSE file in the root directory of this source tree.
65

7-
# pyre-unsafe
6+
from math import prod
87

98
import torch
109
from executorch.backends.arm._passes import ArmPass
@@ -28,42 +27,111 @@ def get_meandim_decomposition(op) -> tuple:
2827
raise RuntimeError(f"Can't get meandim decomposition for op {op}")
2928

3029

30+
def get_avgpool(op):
31+
if op == exir_ops.edge.aten.mean.dim:
32+
return exir_ops.edge.aten.avg_pool2d.default
33+
if op == torch.ops.aten.mean.dim:
34+
return torch.ops.aten.avg_pool2d.default
35+
raise RuntimeError(f"Can't get meandim decomposition for op {op}")
36+
37+
38+
def get_view(op):
39+
if op == exir_ops.edge.aten.mean.dim:
40+
return exir_ops.edge.aten.view_copy.default
41+
if op == torch.ops.aten.mean.dim:
42+
return torch.ops.aten.view_copy.default
43+
raise RuntimeError(f"Can't get meandim decomposition for op {op}")
44+
45+
3146
class DecomposeMeanDimPass(ArmPass):
3247
"""
33-
This pass decomposes meandim into a sum and mul node.
48+
Decomposes a meandim into avg_pool and/or sum + mul (1/N) depending on which dims the mean is taken for:
49+
h,w -> avg_pool
50+
n,c -> sum + mul(1/N)
51+
For rank < 4, the input is first reshaped to 4D by padding with dim=1 from the left.
3452

3553
Example:
36-
y = mean_dim(x, dim, keepdim)
54+
x = mean_dim(x, (0,2), keepdim=False) # x = (c,h,w)
3755
Becomes:
38-
sum = sum.dim_IntList(x, dim, keepdim)
39-
y = mul(sum, 1/N)
56+
x = view_copy.default(x, new_shape=(1,c,h,w)) # Reshape to work with avg_pool
57+
x = avg_pool2d.default(x, kernel=(1,w), stride=(1,1)) # Reduce w with avg_pool
58+
x = sum.dim_IntList(x, dim=1, keepdims=True) # Reduce c with sum
59+
x = mul.Tensor(x, 1/c) # Divide by number of channels to get mean
60+
x = view_copy.default(x, new_shape=(h)) # Squeeze dims since keepdims = False
4061
"""
4162

4263
def call_operator(self, op, args, kwargs, meta):
4364
if op not in (exir_ops.edge.aten.mean.dim, torch.ops.aten.mean.dim):
4465
return super().call_operator(op, args, kwargs, meta)
4566

4667
x = get_node_arg(args, 0)
47-
dim = get_node_arg(args, 1)
48-
keepdim = get_node_arg(args, 2, False)
49-
50-
# if dim == [-1, -2], mean.dim can be
51-
# decomposed to avg_pool2d. This is handled by ConvertMeanDimToAveragePool.
52-
if dim == [-1, -2]:
53-
# Simply return the mean.dim operator for future decomposition.
54-
return super().call_operator(op, args, kwargs, meta)
68+
input_shape = x.data.size()
69+
output_shape = meta["val"].size()
70+
dims_to_reduce = get_node_arg(args, 1)
71+
dims_to_reduce = [dim % len(input_shape) for dim in dims_to_reduce]
5572

56-
shape = meta["val"].size()
5773
dtype = meta["val"].dtype
58-
input_shape = x.data.size()
59-
N = 1
60-
for d in dim:
61-
N *= input_shape[d]
74+
view_op = get_view(op)
6275

76+
if len(input_shape) > 4:
77+
raise NotImplementedError(
78+
f"{op} with rank > 4 is currently not supported for the TOSA backend."
79+
)
80+
81+
# Unsqueeze to 4D
82+
if len(input_shape) < 4:
83+
pad_n = 4 - len(input_shape)
84+
new_shape = [1] * pad_n + list(input_shape)
85+
dims_to_reduce = [dim + pad_n for dim in dims_to_reduce]
86+
87+
x = super().call_operator(view_op, (x, new_shape), {}, meta, True)
88+
89+
# Reduce (h,w) by avg pool
90+
dims_to_reduce_by_avgpool = [dim for dim in dims_to_reduce if dim >= 2]
91+
x = self._reduce_by_average_pool(op, x, dims_to_reduce_by_avgpool, meta)
92+
93+
# Reduce (n, c) by reduce sum
94+
dims_to_reduce_by_sum = [dim for dim in dims_to_reduce if dim < 2]
95+
x = self._reduce_by_sum(op, x, dims_to_reduce_by_sum, meta, dtype)
96+
97+
# Reshape to correct output shape if necessary
98+
if x.data.size() != output_shape:
99+
x = super().call_operator(view_op, (x, output_shape), {}, meta, True)
100+
101+
return x
102+
103+
def _reduce_by_sum(self, op, input_node, dims, meta, dtype):
104+
if len(dims) == 0:
105+
return input_node
106+
107+
input_shape = input_node.data.size()
108+
output_shape = meta["val"].size()
109+
N = prod((n for i, n in enumerate(input_shape) if i in dims))
63110
sum_op, full_op, mul_op = get_meandim_decomposition(op)
64111

65-
sum = super().call_operator(sum_op, (x, dim, keepdim), {}, meta, True)
112+
sum = super().call_operator(sum_op, (input_node, dims, True), {}, meta, True)
66113
full = super().call_operator(
67-
full_op, ([1] * len(shape), 1 / N), {"dtype": dtype}, meta, True
114+
full_op, ([1] * len(output_shape), 1 / N), {"dtype": dtype}, meta, True
68115
)
69116
return super().call_operator(mul_op, (sum, full), {}, meta, True)
117+
118+
def _reduce_by_average_pool(self, op, input_node, dims, meta):
119+
if len(dims) == 0:
120+
return input_node
121+
122+
avgpool_op = get_avgpool(op)
123+
input_shape = input_node.data.size()
124+
125+
stride = [1, 1]
126+
if dims in ([2, 3], [3, 2]):
127+
kernel_size = [input_shape[2], input_shape[3]]
128+
elif dims == [3]:
129+
kernel_size = [1, input_shape[3]]
130+
elif dims == [2]:
131+
kernel_size = [input_shape[2], 1]
132+
else:
133+
raise RuntimeError(f"Bad dims {dims} for {op} decomposition of mean_dim.")
134+
135+
return super().call_operator(
136+
avgpool_op, (input_node, kernel_size, stride), {}, meta, True
137+
)

0 commit comments

Comments
 (0)