Skip to content

Commit f74e340

Browse files
Update base for Update on "[Executorch][target recipes] Add target based recipes for lowering models to a target device"
This diff introduces multi backend/ target based recipes to lower a model with very less code. Target recipes provide pre-configured backend recipes to use them and retarget if needed. See RFC: #13732 ## Usage ``` from executorch.export import export, ExportRecipe, IOSTargetRecipeType # CoreML + XNNPACK coreml_xnnpack_recipe = ExportRecipe.get_recipe(IOSTargetRecipeType.IOS_ARM64_COREML_FP32) session = export(model, coreml_xnnpack_recipe, example_inputs) session.save_pte_file("model.pte") ``` ## Advanced usage one can directly use `ExportRecipe.combine_recipes()` to combine specific backend recipes. ``` recipe1 = ExportRecipe.get_recipe(AndroidRecipeType.XYZ) recipe2 = ExportRecipe.get_recipe(XNNPackRecipeType.FP32) combined_recipe = ExportRecipe.combine( [recipe1, recipe2], recipe_name="multi_backend_coreml_xnnpack_fp32" ) session = export(model, combined_recipe, example_inputs) ``` Fixes: #13732 Differential Revision: [D81297451](https://our.internmc.facebook.com/intern/diff/D81297451/) [ghstack-poisoned]
2 parents a541d90 + 0b0e2dc commit f74e340

File tree

128 files changed

+4613
-1886
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+4613
-1886
lines changed

.ci/scripts/build-qnn-sdk.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ set -o xtrace
1111

1212
build_qnn_backend() {
1313
echo "Start building qnn backend."
14-
export ANDROID_NDK_ROOT=${ANDROID_NDK_ROOT:-/opt/ndk}
15-
export QNN_SDK_ROOT=${QNN_SDK_ROOT:-/tmp/qnn/2.28.0.241029}
14+
# Source QNN configuration
15+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
16+
setup_android_ndk
17+
install_qnn
1618
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
1719

1820
parallelism=$(( $(nproc) - 1 ))

.ci/scripts/setup-qnn-deps.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,5 @@ set -ex
1010
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
1111

1212
setup_libcpp 12
13+
setup_android_ndk
1314
install_qnn

.ci/scripts/test_llama.sh

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,12 @@ echo "COREML option ${COREML}"
119119

120120
if [[ "${MODE}" =~ .*qnn.* ]]; then
121121
QNN=ON
122+
123+
# Download QNN_SDK. If already downloaded, export environment path
124+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
125+
install_qnn
126+
122127
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
123-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
124128
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
125129
export PYTHONPATH=".."
126130
cp schema/program.fbs exir/_serialize/program.fbs
@@ -150,6 +154,7 @@ cmake_install_executorch_libraries() {
150154
echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
151155
rm -rf cmake-out
152156
retry cmake --preset llm \
157+
-DEXECUTORCH_BUILD_TESTS=ON \
153158
-DBUILD_TESTING=OFF \
154159
-DCMAKE_INSTALL_PREFIX=cmake-out \
155160
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
@@ -166,6 +171,7 @@ cmake_build_llama_runner() {
166171
popd
167172
dir="examples/models/llama"
168173
retry cmake \
174+
-DEXECUTORCH_BUILD_TESTS=ON \
169175
-DBUILD_TESTING=OFF \
170176
-DCMAKE_INSTALL_PREFIX=cmake-out \
171177
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,13 @@ set -euxo pipefail
99

1010
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1111

12+
# Source QNN configuration
13+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/qnn_config.sh"
14+
# Download QNN_SDK. If already downloaded, export environment path
15+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
16+
install_qnn
17+
1218
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
13-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
1419
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
1520
export PYTHONPATH=".."
1621
cp schema/program.fbs exir/_serialize/program.fbs

.github/workflows/android-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ jobs:
292292
export.output_name="${OUT_ET_MODEL_NAME}.pte"
293293
ls -lh "${OUT_ET_MODEL_NAME}.pte"
294294
elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
295-
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
295+
export QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072
296296
export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
297297
export PYTHONPATH=$(pwd)/..
298298
@@ -432,7 +432,7 @@ jobs:
432432
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
433433
434434
mkdir -p aar-out
435-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
435+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.37.0.25072 EXECUTORCH_ANDROID_PROFILING=ON bash scripts/build_android_library.sh
436436
mkdir -p extension/benchmark/android/benchmark/app/libs
437437
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
438438
pushd extension/benchmark/android/benchmark

.github/workflows/android-release-artifacts.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ jobs:
104104
source backends/qualcomm/scripts/qnn_config.sh
105105
export QNN_SDK_ROOT="/tmp/qnn/${QNN_VERSION}"
106106
export ANDROID_ABIS=arm64-v8a
107-
GRADLE_ARGS+=" -DqnnVersion=2.28.0"
107+
GRADLE_ARGS+=" -DqnnVersion=2.37.0"
108108
fi
109109
110110
# Build AAR Package

.github/workflows/apple-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,7 +230,7 @@ jobs:
230230
model.use_sdpa_with_kv_cache=true \
231231
backend.xnnpack.enabled=true \
232232
backend.xnnpack.extended_ops=true \
233-
base.preq_mode="8da4w_output_8da8w" \
233+
base.preq_mode="preq_8da4w_out_8da8w" \
234234
base.preq_group_size=32 \
235235
export.max_seq_length=2048 \
236236
export.max_context_length=2048 \
@@ -256,7 +256,7 @@ jobs:
256256
base.params="${DOWNLOADED_PATH}/params.json" \
257257
quantization.use_qat=true \
258258
base.use_lora=16 \
259-
base.preq_mode="8da4w_output_8da8w" \
259+
base.preq_mode="preq_8da4w_out_8da8w" \
260260
base.preq_group_size=32 \
261261
base.preq_embedding_quantize=\'8,0\' \
262262
model.use_sdpa_with_kv_cache=true \

backends/arm/README.md

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,38 @@ List of model specific and optional passes:
209209
- InsertCastForOpsWithInt64InputPass
210210
- Functionality:
211211
- For LLMs such as LLama, some opeartors like aten.embedding have int64 input. In order to lower these operators to TOSA, this pass will insert a casting node that converts the input from int64 to int32.
212-
- Example usage: backends/arm/test/models/test_llama.py
213212
- Supported Ops:
214213
- aten.embedding.default, aten.slice_copy.Tensor
214+
- Example usage:
215+
- backends/arm/test/models/test_llama.py
216+
217+
- ConvertInt64ConstOpsToInt32Pass
218+
- Functionalities:
219+
- Rewrites constant-producing ops that output int64 to instead output int32, when values are within int32 bounds.
220+
- Supported Ops:
221+
- `torch.full`, `torch.arange`, `torch.eye`, `torch.linspace`, `torch.tensor`
222+
- Example usage:
223+
- backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py
224+
- backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py
225+
226+
- ConvertInt64OutputOpsToInt32Pass
227+
- Overview:
228+
- Rewrites or removes operations that produce int64 outputs, converting them to int32 where possible.
229+
- Overflow checks are applied selectively; for ops without such checks, users need to ensure values fit within the int32 range.
230+
- Functionalities:
231+
1. Handling casting to int64:
232+
- (1) int32 -> int64:
233+
- Removes the cast and redirect uses of int64 to int32
234+
- (2) other types -> int64:
235+
- Rewrites the cast to other types -> int32
236+
- Supported Ops:
237+
- torch.ops.aten.to.\[dtype|dtype_layout\]
238+
- exir_ops.edge.dim_order_ops._to_dim_order_copy.default
239+
2. Post-process argmax outputs:
240+
- Inserts an int64->int32 cast after the argmax operations that produce int64 outputs:
241+
- Supported Ops:
242+
- torch.ops.aten.argmax.default
243+
- exir_ops.edge.aten.argmax.default
244+
- Example usage:
245+
- (Functionality 1) backends/arm/test/models/stable_diffusion/test_T5EncoderModel.py
246+
- (Functionality 2) backends/arm/test/models/stable_diffusion/test_CLIPTextModelWithProjection.py

backends/arm/_passes/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@
1414
from .cast_to_int32_pass import CastToInt32Pass # noqa
1515
from .conv1d_unsqueeze_pass import Conv1dUnsqueezePass # noqa
1616
from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass # noqa
17+
from .convert_elu_params import ConvertELUParamsPass # noqa
1718
from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass # noqa
1819
from .convert_full_like_to_full_pass import ConvertFullLikeToFullPass # noqa
20+
from .convert_int64_const_ops_to_int32 import ConvertInt64ConstOpsToInt32Pass # noqa
21+
from .convert_int64_output_ops_to_int32 import ConvertInt64OutputOpsToInt32Pass # noqa
1922
from .convert_int_pow_to_mul import ConvertIntPowToMuls # noqa
2023
from .convert_minmax_pass import ConvertMinMaxPass # noqa
2124
from .convert_split_to_slice import ConvertSplitToSlicePass # noqa
@@ -34,6 +37,7 @@
3437
from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass # noqa
3538
from .decompose_cumsum_pass import DecomposeCumsumPass # noqa
3639
from .decompose_div_pass import DecomposeDivPass # noqa
40+
from .decompose_elu_pass import DecomposeEluPass # noqa
3741
from .decompose_embedding_pass import DecomposeEmbeddingPass # noqa # noqa
3842
from .decompose_expm1_pass import DecomposeExpm1Pass # noqa
3943
from .decompose_gelu_pass import DecomposeGeluPass # noqa

backends/arm/_passes/arm_pass_manager.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
ComputeConstantOpsAOT,
1919
Conv1dUnsqueezePass,
2020
ConvertAnyDefaultDimDimsPass,
21+
ConvertELUParamsPass,
2122
ConvertExpandCopyToRepeatPass,
2223
ConvertFullLikeToFullPass,
24+
ConvertInt64ConstOpsToInt32Pass,
25+
ConvertInt64OutputOpsToInt32Pass,
2326
ConvertIntPowToMuls,
2427
ConvertMinMaxPass,
2528
ConvertMmToBmmPass,
@@ -39,6 +42,7 @@
3942
DecomposeCosineSimilarityPass,
4043
DecomposeCumsumPass,
4144
DecomposeDivPass,
45+
DecomposeEluPass,
4246
DecomposeEmbeddingPass,
4347
DecomposeExpm1Pass,
4448
DecomposeGeluPass,
@@ -98,6 +102,7 @@
98102
from executorch.backends.transforms.remove_getitem_op import RemoveGetItemPass
99103
from executorch.exir import ExportedProgram
100104
from executorch.exir.pass_manager import PassManager
105+
from executorch.exir.passes.remove_graph_asserts_pass import RemoveGraphAssertsPass
101106
from torch.fx import GraphModule
102107

103108

@@ -132,6 +137,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
132137
self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
133138
self.add_pass(AnnotateDecomposedMatmulPass())
134139
self.add_pass(QuantizeOperatorArguments())
140+
self.add_pass(ConvertELUParamsPass())
135141
self.add_pass(FoldAndAnnotateQParamsPass(exported_program)) # type: ignore[call-arg]
136142
self.add_pass(RetraceFoldedDtypesPass())
137143
self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
@@ -180,6 +186,8 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
180186
self.add_pass(DecomposeAtanPass())
181187
self.add_pass(DecomposeAtanhPass())
182188
self.add_pass(DecomposeAddmmPass())
189+
self.add_pass(DecomposeEluPass())
190+
self.add_pass(DecomposeExpm1Pass())
183191
self.add_pass(ConvertIntPowToMuls())
184192
self.add_pass(CastBoolToInt8Pass())
185193
self.add_pass(DecomposeSinhPass())
@@ -258,6 +266,11 @@ def transform_to_backend_pipeline(self, exported_program: ExportedProgram):
258266
)
259267

260268
def transform_for_annotation_pipeline(self, graph_module: GraphModule):
269+
self.add_pass(
270+
RemoveGraphAssertsPass()
271+
) # ConvertInt64ConstOpsToInt32Pass requires this pass to remove the assertation in Graph
272+
self.add_pass(ConvertInt64ConstOpsToInt32Pass())
273+
self.add_pass(ConvertInt64OutputOpsToInt32Pass())
261274
self.add_pass(InsertCastForOpsWithInt64InputPass())
262275
self.add_pass(DecomposeEmbeddingPass())
263276
self.add_pass(DecomposeScaledDotProductAttention())

0 commit comments

Comments
 (0)