Skip to content

Commit fc9c722

Browse files
authored
Merge branch 'main' into openvino_backend
2 parents 9dd9b26 + 8b948e8 commit fc9c722

File tree

108 files changed

+4402
-1415
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+4402
-1415
lines changed

.ci/scripts/test_model.sh

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,15 @@ test_model() {
9696
bash examples/models/llama/install_requirements.sh
9797
# Test export_llama script: python3 -m examples.models.llama.export_llama.
9898
# Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
99-
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/qwen2_5/1_5b_config.json
99+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -p examples/models/qwen2_5/1_5b_config.json
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103103
if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
104104
# Install requirements for export_llama
105105
bash examples/models/llama/install_requirements.sh
106106
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107-
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi_4_mini/config.json
107+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -p examples/models/phi_4_mini/config.json
108108
run_portable_executor_runner
109109
rm "./${MODEL_NAME}.pte"
110110
return
@@ -224,19 +224,22 @@ test_model_with_coreml() {
224224

225225
"${PYTHON_EXECUTABLE}" -m examples.apple.coreml.scripts.export --model_name="${MODEL_NAME}" --compute_precision "${DTYPE}"
226226
EXPORTED_MODEL=$(find "." -type f -name "${MODEL_NAME}*.pte" -print -quit)
227-
# TODO:
227+
228228
if [ -n "$EXPORTED_MODEL" ]; then
229229
EXPORTED_MODEL_WITH_DTYPE="${EXPORTED_MODEL%.pte}_${DTYPE}.pte"
230230
mv "$EXPORTED_MODEL" "$EXPORTED_MODEL_WITH_DTYPE"
231231
EXPORTED_MODEL="$EXPORTED_MODEL_WITH_DTYPE"
232-
echo "Renamed file path: $EXPORTED_MODEL"
232+
echo "OK exported model: $EXPORTED_MODEL"
233233
else
234-
echo "No .pte file found"
234+
echo "[error] failed to export model: no .pte file found"
235235
exit 1
236236
fi
237237

238238
# Run the model
239239
if [ "${should_test}" = true ]; then
240+
echo "Installing requirements needed to build coreml_executor_runner..."
241+
backends/apple/coreml/scripts/install_requirements.sh
242+
240243
echo "Testing exported model with coreml_executor_runner..."
241244
local out_dir=$(mktemp -d)
242245
COREML_EXECUTOR_RUNNER_OUT_DIR="${out_dir}" examples/apple/coreml/scripts/build_executor_runner.sh

.ci/scripts/wheel/test_macos.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,9 @@
1515
model=Model.Mv3,
1616
backend=Backend.XnnpackQuantizationDelegation,
1717
),
18-
# Enable this once CoreML is suppported out-of-the-box
19-
# https://github.com/pytorch/executorch/issues/9019
20-
# test_base.ModelTest(
21-
# model=Model.Mv3,
22-
# backend=Backend.CoreMlTest,
23-
# )
18+
test_base.ModelTest(
19+
model=Model.Mv3,
20+
backend=Backend.CoreMlTest,
21+
),
2422
]
2523
)

.github/workflows/build-wheels-linux.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ on:
66
paths:
77
- .ci/**/*
88
- .github/workflows/build-wheels-linux.yml
9+
- examples/**/*
10+
- pyproject.toml
11+
- setup.py
912
push:
1013
branches:
1114
- nightly

.github/workflows/build-wheels-macos.yml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@ on:
66
paths:
77
- .ci/**/*
88
- .github/workflows/build-wheels-macos.yml
9+
- examples/**/*
10+
- pyproject.toml
11+
- setup.py
912
push:
1013
branches:
1114
- nightly
@@ -57,6 +60,8 @@ jobs:
5760
pre-script: ${{ matrix.pre-script }}
5861
post-script: ${{ matrix.post-script }}
5962
package-name: ${{ matrix.package-name }}
60-
runner-type: macos-m1-stable
63+
# Meta's macOS runners do not have Xcode, so use GitHub's runners.
64+
runner-type: macos-latest-xlarge
65+
setup-miniconda: true
6166
smoke-test-script: ${{ matrix.smoke-test-script }}
6267
trigger-event: ${{ github.event_name }}

.gitmodules

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
[submodule "backends/arm/third-party/ethos-u-core-driver"]
22
path = backends/arm/third-party/ethos-u-core-driver
3-
url = https://github.com/pytorch-labs/ethos-u-core-driver-mirror
3+
url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
44
[submodule "backends/arm/third-party/serialization_lib"]
55
path = backends/arm/third-party/serialization_lib
6-
url = https://github.com/pytorch-labs/tosa_serialization_lib-mirror
6+
url = https://git.gitlab.arm.com/tosa/tosa-serialization.git
77
[submodule "backends/vulkan/third-party/Vulkan-Headers"]
88
path = backends/vulkan/third-party/Vulkan-Headers
99
url = https://github.com/KhronosGroup/Vulkan-Headers

backends/arm/README.md

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -79,30 +79,12 @@ test # Root test folder
7979

8080
Some example commands to run these tests follow. Run a single test:
8181

82-
```
83-
python -m unittest backends.arm.test.ops.test_add.TestSimpleAdd -k test_add2_tosa_BI
84-
```
85-
86-
or with pytest
87-
8882
```
8983
pytest -c /dev/null -v -n auto backends/arm/test/ops/test_add.py -k test_add2_tosa_BI
9084
```
9185

92-
Or all tests in "TestSimpleAdd":
93-
94-
```
95-
python -m unittest backends.arm.test.ops.test_add.TestSimpleAdd
96-
```
97-
9886
Or discover and run many tests:
9987

100-
```
101-
python -m unittest discover -s backends/arm/test/ops/
102-
```
103-
104-
or with pytest
105-
10688
```
10789
pytest -c /dev/null -v -n auto backends/arm/test/ops/
10890
```

backends/arm/_passes/__init__.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
7+
from . import arm_pass_utils # noqa
8+
from .annotate_channels_last_dim_order_pass import AnnotateChannelsLastDimOrder # noqa
9+
from .annotate_decomposed_matmul import AnnotateDecomposedMatmulPass # noqa
10+
from .cast_int64_pass import CastInt64BuffersToInt32Pass # noqa
11+
from .cast_to_int32_pass import CastToInt32Pass # noqa
12+
from .conv1d_unsqueeze_pass import Conv1dUnsqueezePass # noqa
13+
from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass # noqa
14+
from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass # noqa
15+
from .convert_full_like_to_full_pass import ConvertFullLikeToFullPass # noqa
16+
from .convert_minmax_pass import ConvertMinMaxPass # noqa
17+
from .convert_split_to_slice import ConvertSplitToSlicePass # noqa
18+
from .convert_squeezes_to_view import ConvertSqueezesToViewPass # noqa
19+
from .convert_to_clamp import ConvertToClampPass # noqa
20+
from .decompose_batchnorm_pass import DecomposeBatchNormPass # noqa
21+
from .decompose_div_pass import DecomposeDivPass # noqa
22+
from .decompose_layernorm_pass import DecomposeLayerNormPass # noqa
23+
from .decompose_linear_pass import DecomposeLinearPass # noqa
24+
from .decompose_meandim_pass import DecomposeMeanDimPass # noqa
25+
from .decompose_select import DecomposeSelectPass # noqa
26+
from .decompose_softmax_pass import DecomposeSoftmaxPass # noqa
27+
from .decompose_softmax_unstable_pass import DecomposeSoftmaxUnstablePass # noqa
28+
from .decompose_var_pass import DecomposeVarPass # noqa
29+
from .fold_qdq_with_annotated_qparams_pass import ( # noqa
30+
FoldAndAnnotateQParamsPass,
31+
QuantizeOperatorArguments,
32+
RetraceFoldedDtypesPass,
33+
)
34+
from .fuse_batchnorm2d_pass import FuseBatchnorm2DPass # noqa
35+
from .fuse_constant_ops_pass import ComputeConstantOpsAOT, FuseConstantArgsPass # noqa
36+
from .fuse_quantized_activation_pass import FuseQuantizedActivationPass # noqa
37+
from .insert_rescales_pass import InsertRescalePass # noqa
38+
from .insert_table_ops import InsertTableOpsPass # noqa
39+
from .keep_dims_false_to_squeeze_pass import KeepDimsFalseToSqueezePass # noqa
40+
from .match_arg_ranks_pass import MatchArgRanksPass # noqa
41+
from .meandim_to_averagepool_pass import ConvertMeanDimToAveragePoolPass # noqa
42+
from .mm_to_bmm_pass import ConvertMmToBmmPass # noqa
43+
from .remove_clone_pass import RemoveClonePass # noqa
44+
from .scalars_to_attribute_pass import ScalarsToAttributePass # noqa
45+
from .size_adjust_conv2d_pass import SizeAdjustConv2DPass # noqa
46+
from .unsqueeze_before_repeat_pass import UnsqueezeBeforeRepeatPass # noqa
47+
from .unsqueeze_scalar_placeholders_pass import UnsqueezeScalarPlaceholdersPass # noqa
48+
from .arm_pass_manager import ArmPassManager # noqa # usort: skip

backends/arm/_passes/arm_pass_manager.py

Lines changed: 26 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -7,82 +7,45 @@
77

88
# pyre-unsafe
99

10-
from executorch.backends.arm._passes.annotate_channels_last_dim_order_pass import (
10+
from executorch.backends.arm._passes import (
1111
AnnotateChannelsLastDimOrder,
12-
)
13-
from executorch.backends.arm._passes.annotate_decomposed_matmul import (
1412
AnnotateDecomposedMatmulPass,
15-
)
16-
from executorch.backends.arm._passes.cast_int64_pass import CastInt64ToInt32Pass
17-
from executorch.backends.arm._passes.conv1d_unsqueeze_pass import Conv1dUnsqueezePass
18-
from executorch.backends.arm._passes.convert_any_default_dim_dims_pass import (
13+
CastInt64BuffersToInt32Pass,
14+
CastToInt32Pass,
15+
ComputeConstantOpsAOT,
16+
Conv1dUnsqueezePass,
1917
ConvertAnyDefaultDimDimsPass,
20-
)
21-
from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
2218
ConvertExpandCopyToRepeatPass,
23-
)
24-
from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
2519
ConvertFullLikeToFullPass,
26-
)
27-
from executorch.backends.arm._passes.convert_minmax_pass import ConvertMinMaxPass
28-
from executorch.backends.arm._passes.convert_split_to_slice import (
20+
ConvertMeanDimToAveragePoolPass,
21+
ConvertMinMaxPass,
22+
ConvertMmToBmmPass,
2923
ConvertSplitToSlicePass,
30-
)
31-
from executorch.backends.arm._passes.convert_squeezes_to_view import ( # type: ignore[import-not-found]
3224
ConvertSqueezesToViewPass,
33-
)
34-
from executorch.backends.arm._passes.convert_to_clamp import ConvertToClampPass
35-
from executorch.backends.arm._passes.decompose_batchnorm_pass import (
25+
ConvertToClampPass,
3626
DecomposeBatchNormPass,
37-
)
38-
from executorch.backends.arm._passes.decompose_div_pass import DecomposeDivPass
39-
from executorch.backends.arm._passes.decompose_layernorm_pass import (
27+
DecomposeDivPass,
4028
DecomposeLayerNormPass,
41-
)
42-
from executorch.backends.arm._passes.decompose_linear_pass import DecomposeLinearPass
43-
from executorch.backends.arm._passes.decompose_meandim_pass import DecomposeMeanDimPass
44-
from executorch.backends.arm._passes.decompose_select import ( # type: ignore[import-not-found]
29+
DecomposeLinearPass,
30+
DecomposeMeanDimPass,
4531
DecomposeSelectPass,
46-
)
47-
from executorch.backends.arm._passes.decompose_softmax_pass import DecomposeSoftmaxPass
48-
from executorch.backends.arm._passes.decompose_softmax_unstable_pass import (
32+
DecomposeSoftmaxPass,
4933
DecomposeSoftmaxUnstablePass,
50-
)
51-
from executorch.backends.arm._passes.decompose_var_pass import DecomposeVarPass
52-
from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
34+
DecomposeVarPass,
5335
FoldAndAnnotateQParamsPass,
54-
QuantizeOperatorArguments,
55-
RetraceFoldedDtypesPass,
56-
)
57-
from executorch.backends.arm._passes.fuse_batchnorm2d_pass import FuseBatchnorm2DPass
58-
from executorch.backends.arm._passes.fuse_constant_ops_pass import (
59-
ComputeConstantOpsAOT,
36+
FuseBatchnorm2DPass,
6037
FuseConstantArgsPass,
61-
)
62-
from executorch.backends.arm._passes.fuse_quantized_activation_pass import ( # type: ignore[import-not-found]
6338
FuseQuantizedActivationPass,
64-
)
65-
from executorch.backends.arm._passes.insert_rescales_pass import InsertRescalePass
66-
from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
67-
from executorch.backends.arm._passes.keep_dims_false_to_squeeze_pass import (
39+
InsertRescalePass,
40+
InsertTableOpsPass,
6841
KeepDimsFalseToSqueezePass,
69-
)
70-
from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass
71-
from executorch.backends.arm._passes.meandim_to_averagepool_pass import ( # type: ignore[attr-defined]
72-
ConvertMeanDimToAveragePoolPass,
73-
)
74-
from executorch.backends.arm._passes.mm_to_bmm_pass import ( # type: ignore[import-not-found]
75-
ConvertMmToBmmPass,
76-
)
77-
from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass
78-
from executorch.backends.arm._passes.scalars_to_attribute_pass import (
42+
MatchArgRanksPass,
43+
QuantizeOperatorArguments,
44+
RemoveClonePass,
45+
RetraceFoldedDtypesPass,
7946
ScalarsToAttributePass,
80-
)
81-
from executorch.backends.arm._passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass
82-
from executorch.backends.arm._passes.unsqueeze_before_repeat_pass import (
47+
SizeAdjustConv2DPass,
8348
UnsqueezeBeforeRepeatPass,
84-
)
85-
from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import (
8649
UnsqueezeScalarPlaceholdersPass,
8750
)
8851
from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
@@ -118,6 +81,8 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
11881
self.add_pass(ConvertToClampPass())
11982
self.add_pass(ConvertMinMaxPass())
12083
self.add_pass(ConvertAnyDefaultDimDimsPass())
84+
if isinstance(self.tosa_spec, Tosa_0_80) and self.tosa_spec.is_U55_subset:
85+
self.add_pass(CastToInt32Pass())
12186

12287
self.add_pass(ReplaceScalarWithTensorArgPass())
12388
self.add_pass(AnnotateDecomposedMatmulPass())
@@ -132,7 +97,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
13297
self.add_pass(SizeAdjustConv2DPass())
13398
self.add_pass(ConvertExpandCopyToRepeatPass())
13499
self.add_pass(UnsqueezeBeforeRepeatPass())
135-
self.add_pass(CastInt64ToInt32Pass(exported_program))
100+
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
136101
self.add_pass(KeepDimsFalseToSqueezePass())
137102
self.add_pass(Conv1dUnsqueezePass(exported_program))
138103
self.add_pass(DecomposeSelectPass())
@@ -179,7 +144,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
179144
self.add_pass(SizeAdjustConv2DPass())
180145
self.add_pass(ConvertExpandCopyToRepeatPass())
181146
self.add_pass(UnsqueezeBeforeRepeatPass())
182-
self.add_pass(CastInt64ToInt32Pass(exported_program))
147+
self.add_pass(CastInt64BuffersToInt32Pass(exported_program))
183148
self.add_pass(KeepDimsFalseToSqueezePass())
184149
self.add_pass(Conv1dUnsqueezePass(exported_program))
185150
self.add_pass(DecomposeSelectPass())

backends/arm/_passes/cast_int64_pass.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@
1515
logger.setLevel(logging.WARNING)
1616

1717

18-
class CastInt64ToInt32Pass(ExportPass):
18+
class CastInt64BuffersToInt32Pass(ExportPass):
1919
"""
2020
Cast int64 buffers to int32 if the int64 data is in int32 range.
2121
"""
2222

2323
def __init__(self, exported_program: torch.export.ExportedProgram):
24-
super(CastInt64ToInt32Pass, self).__init__()
24+
super(CastInt64BuffersToInt32Pass, self).__init__()
2525
self.exported_program = exported_program
2626

2727
def _assert_within_int32(self, tensor: torch.Tensor, node: torch.fx.Node):
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright 2025 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
import torch
7+
8+
from executorch.exir.dialects._ops import ops as exir_ops
9+
from executorch.exir.pass_base import ExportPass
10+
11+
12+
class CastToInt32Pass(ExportPass):
13+
"""Casts the input to int32 if it is not already and casts back the output to the original input dtype."""
14+
15+
targeted_ops = {
16+
exir_ops.edge.aten.bitwise_left_shift.Tensor,
17+
exir_ops.edge.aten.bitwise_right_shift.Tensor,
18+
}
19+
20+
def call_operator(self, op, args, kwargs, meta):
21+
if op not in self.targeted_ops:
22+
return super().call_operator(op, args, kwargs, meta)
23+
24+
new_args: list = []
25+
did_cast = False
26+
for arg in args:
27+
if arg.data.dtype != torch.int32:
28+
new_args.append(
29+
super().call_operator(
30+
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
31+
(arg,),
32+
{"dtype": torch.int32},
33+
meta,
34+
)
35+
)
36+
did_cast = True
37+
else:
38+
new_args.append(arg)
39+
40+
output = super().call_operator(
41+
op,
42+
tuple(new_args),
43+
{},
44+
meta,
45+
)
46+
47+
if did_cast:
48+
output = super().call_operator(
49+
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
50+
(output,),
51+
{"dtype": args[0].data.dtype},
52+
meta,
53+
)
54+
return output

0 commit comments

Comments
 (0)