Skip to content

Commit eed5f35

Browse files
authored
Merge branch 'main' into mergennachin-patch-2
2 parents 1a19faf + c671b92 commit eed5f35

File tree

245 files changed

+8532
-2436
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

245 files changed

+8532
-2436
lines changed

.ci/scripts/test_huggingface_optimum_model.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,20 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
262262

263263
assert torch.allclose(
264264
eager_output.logits, et_output, atol=1e-02, rtol=1e-02
265-
), "CoreML output does not match eager"
265+
), "Model output does not match eager"
266266

267267

268268
if __name__ == "__main__":
269269
parser = argparse.ArgumentParser()
270270
parser.add_argument("--model", type=str, required=True)
271271
parser.add_argument("--recipe", type=str, required=True)
272272
parser.add_argument("--quantize", action="store_true", help="Enable quantization")
273+
parser.add_argument(
274+
"--model_dir",
275+
type=str,
276+
required=False,
277+
help="When provided, write the pte file to this directory. Otherwise, a temporary directory is created for the test.",
278+
)
273279
args = parser.parse_args()
274280

275281
model_to_model_id_and_test_function = {
@@ -294,11 +300,11 @@ def test_vit(model_id, model_dir, recipe, *, quantize=False, run_only=False):
294300
f"Unknown model name: {args.model}. Available models: {model_to_model_id_and_test_function.keys()}"
295301
)
296302

303+
model_id, test_fn = model_to_model_id_and_test_function[args.model]
297304
with tempfile.TemporaryDirectory() as tmp_dir:
298-
model_id, test_fn = model_to_model_id_and_test_function[args.model]
299305
test_fn(
300306
model_id=model_id,
301-
model_dir=tmp_dir,
307+
model_dir=tmp_dir if args.model_dir is None else args.model_dir,
302308
recipe=args.recipe,
303309
quantize=args.quantize,
304310
)

.ci/scripts/test_model.sh

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -166,34 +166,51 @@ test_model_with_qnn() {
166166
export PYTHONPATH=$EXECUTORCH_ROOT/..
167167

168168
EXTRA_FLAGS=""
169+
# Ordered by the folder name, then alphabetically by the model name
170+
# Following models are inside examples/qualcomm/scripts folder
169171
if [[ "${MODEL_NAME}" == "dl3" ]]; then
170172
EXPORT_SCRIPT=deeplab_v3
171-
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
172-
EXPORT_SCRIPT=mobilenet_v3
173-
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
174-
EXPORT_SCRIPT=mobilenet_v2
175-
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
176-
EXPORT_SCRIPT=inception_v4
173+
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
174+
EXPORT_SCRIPT=edsr
175+
# Additional deps for edsr
176+
pip install piq
177177
elif [[ "${MODEL_NAME}" == "ic3" ]]; then
178178
EXPORT_SCRIPT=inception_v3
179-
elif [[ "${MODEL_NAME}" == "vit" ]]; then
180-
EXPORT_SCRIPT=torchvision_vit
179+
elif [[ "${MODEL_NAME}" == "ic4" ]]; then
180+
EXPORT_SCRIPT=inception_v4
181181
elif [[ "${MODEL_NAME}" == "mb" ]]; then
182182
EXPORT_SCRIPT=mobilebert_fine_tune
183183
EXTRA_FLAGS="--num_epochs 1"
184184
pip install scikit-learn
185+
elif [[ "${MODEL_NAME}" == "mv2" ]]; then
186+
EXPORT_SCRIPT=mobilenet_v2
187+
elif [[ "${MODEL_NAME}" == "mv3" ]]; then
188+
EXPORT_SCRIPT=mobilenet_v3
189+
elif [[ "${MODEL_NAME}" == "vit" ]]; then
190+
EXPORT_SCRIPT=torchvision_vit
185191
elif [[ "${MODEL_NAME}" == "w2l" ]]; then
186192
EXPORT_SCRIPT=wav2letter
187193
elif [[ "${MODEL_NAME}" == "edsr" ]]; then
188194
EXPORT_SCRIPT=edsr
189195
# Additional deps for edsr
190196
pip install piq
197+
# Following models are inside examples/qualcomm/oss_scripts folder
198+
elif [[ "${MODEL_NAME}" == "albert" ]]; then
199+
EXPORT_SCRIPT=albert
200+
elif [[ "${MODEL_NAME}" == "bert" ]]; then
201+
EXPORT_SCRIPT=bert
202+
elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
203+
EXPORT_SCRIPT=conv_former
191204
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
192205
EXPORT_SCRIPT=cvt
206+
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
207+
EXPORT_SCRIPT=distilbert
193208
elif [[ "${MODEL_NAME}" == "dit" ]]; then
194209
EXPORT_SCRIPT=dit
195210
elif [[ "${MODEL_NAME}" == "efficientnet" ]]; then
196211
EXPORT_SCRIPT=efficientnet
212+
elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
213+
EXPORT_SCRIPT=eurobert
197214
elif [[ "${MODEL_NAME}" == "focalnet" ]]; then
198215
EXPORT_SCRIPT=focalnet
199216
elif [[ "${MODEL_NAME}" == "mobilevit_v1" ]]; then
@@ -202,18 +219,10 @@ test_model_with_qnn() {
202219
EXPORT_SCRIPT=mobilevit_v2
203220
elif [[ "${MODEL_NAME}" == "pvt" ]]; then
204221
EXPORT_SCRIPT=pvt
205-
elif [[ "${MODEL_NAME}" == "swin" ]]; then
206-
EXPORT_SCRIPT=swin_transformer
207-
elif [[ "${MODEL_NAME}" == "albert" ]]; then
208-
EXPORT_SCRIPT=albert
209-
elif [[ "${MODEL_NAME}" == "bert" ]]; then
210-
EXPORT_SCRIPT=bert
211-
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then
212-
EXPORT_SCRIPT=distilbert
213-
elif [[ "${MODEL_NAME}" == "eurobert" ]]; then
214-
EXPORT_SCRIPT=eurobert
215222
elif [[ "${MODEL_NAME}" == "roberta" ]]; then
216223
EXPORT_SCRIPT=roberta
224+
elif [[ "${MODEL_NAME}" == "swin" ]]; then
225+
EXPORT_SCRIPT=swin_transformer
217226
else
218227
echo "Unsupported model $MODEL_NAME"
219228
exit 1
@@ -231,7 +240,7 @@ test_model_with_qnn() {
231240
"cvt"|"dit"|"focalnet"|"mobilevit_v2"|"pvt"|"swin")
232241
SCRIPT_FOLDER=oss_scripts
233242
;;
234-
"albert"|"bert"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
243+
"albert"|"bert"|"conv_former"|"distilbert"|"roberta"|"efficientnet"|"mobilevit_v1")
235244
pip install evaluate
236245
SCRIPT_FOLDER=oss_scripts
237246
# 16bit models will encounter op validation fail on some operations,

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,12 @@ echo "Creating tokenizer.bin"
3333
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
3434

3535
set +e
36-
# Compile only as weight sharing is not applicable on x86
37-
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --llama_artifacts . --compile_only
36+
# Compile only as weight sharing is not applicable on x86.
37+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
3838
exit_code1=$?
3939

4040
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
41-
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --llama_artifacts . --enable_x86_64
41+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
4242
exit_code2=$?
4343

4444
# Check BC

.github/workflows/trunk.yml

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
6161
strategy:
6262
matrix:
63-
model: [add]
63+
model: [add, softmax, mv2]
6464
fail-fast: false
6565
with:
6666
runner: linux.2xlarge
@@ -72,31 +72,85 @@ jobs:
7272
MODEL_NAME=${{ matrix.model }}
7373
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
7474
conda activate "${CONDA_ENV}"
75+
if [[ ${{ matrix.model}} == "add" ]]; then
76+
SIM_LIMIT_SEC=60
77+
elif [[ ${{ matrix.model}} == "softmax" ]]; then
78+
SIM_LIMIT_SEC=60
79+
elif [[ ${{ matrix.model}} == "mv2" ]]; then
80+
SIM_LIMIT_SEC=5000
81+
else
82+
echo "Failed unsupported model selection ${{ matrix.model }}"
83+
exit 1
84+
fi
7585
7686
source .ci/scripts/utils.sh
7787
source .ci/scripts/zephyr-utils.sh
7888
mkdir -p zephyr_scratch/
7989
cd zephyr_scratch
8090
export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
91+
export ARM_FVP_TUTORIALS_ROOT=$ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm-fvp-tutorials
8192
93+
# TODO @Bujji: Should see if this can be moved into the docker image itself
8294
download_arm_zephyr_sdk
8395
./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
84-
8596
cd $ZEPHYR_PROJ_ROOT
8697
setup_zephyr_et_module
8798
99+
# Run setup scripts for Arm FVP and Arm AOT Compilation
88100
cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
89101
install_executorch "--use-pt-pinned-commit"
90102
.ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
91103
source examples/arm/ethos-u-scratch/setup_path.sh
92104
source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
93-
cd $ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm/hello_world
94-
west build -p always -b mps3/corstone300/fvp
95-
FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 -C mps3_board.uart0.out_file='sim.out' -C cpu0.CFGITCMSZ=15 -C cpu0.CFGDTCMSZ=15 --simlimit 120
96105
97-
grep -qF "Output[0][0]: (float) 2.000000" sim.out
106+
# Get the model as PTE
107+
python -m examples.arm.aot_arm_compiler \
108+
--model_name="${MODEL_NAME}" \
109+
--output="${MODEL_NAME}.pte"
110+
111+
# Generate the C-style header
112+
cd $ARM_FVP_TUTORIALS_ROOT
113+
python build_model.py \
114+
--executorch-root $ZEPHYR_PROJ_ROOT/modules/lib/executorch \
115+
--pte-file $ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte \
116+
--output-path $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/src/
117+
118+
cd $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/
119+
120+
# Build the zephyr elf
121+
west build -p always -b mps3/corstone300/fvp -- \
122+
-DET_PTE_FILE_PATH_FOR_SELECTIVE_BUILD=$ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte
123+
124+
# Run the simulation
125+
FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf \
126+
-C mps3_board.visualisation.disable-visualisation=1 \
127+
-C mps3_board.telnetterminal0.start_telnet=0 \
128+
-C mps3_board.uart0.out_file='sim.out' \
129+
-C cpu0.CFGITCMSZ=15 \
130+
-C cpu0.CFGDTCMSZ=15 \
131+
--simlimit ${SIM_LIMIT_SEC}
132+
133+
# Disable exit on error
134+
set +e
135+
# Report failure if any of the ouptut verification checks fail
136+
grep -qF "ERROR" sim.out
137+
exit_status=$? #store 0 if found (failure), 1 if not (success)
138+
if [[ "$exit_status" -eq "0" ]]; then
139+
cat sim.out
140+
set -e
141+
exit 1
142+
fi
143+
144+
# Report fail if simulation does not complete successfully
145+
grep -qF "SUCCESS: Program complete, exiting." sim.out
98146
exit_status=$? #store 0 if found (success), 1 if not (failure)
99-
exit $exit_status
147+
if [[ "$exit_status" -eq "1" ]]; then
148+
cat sim.out
149+
set -e
150+
exit 1
151+
fi
152+
# Re-enable exit on error
153+
set -e
100154
101155
test-models-linux-aarch64:
102156
name: test-models-linux-aarch64
@@ -285,12 +339,12 @@ jobs:
285339
setup_script_args=""
286340
if [[ ${{ matrix.os}} == "bare_metal" ]]; then
287341
toolchain_prefix=arm-none-eabi-
288-
threshold="109000"
342+
threshold="110592" # 108 KiB
289343
toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
290344
elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
291345
setup_script_args="--target-toolchain zephyr"
292346
toolchain_prefix=arm-zephyr-eabi-
293-
threshold="135000"
347+
threshold="135168" # 132 KiB
294348
toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
295349
else
296350
echo "Fail unsupport OS selection ${{ matrix.os }}"
@@ -568,7 +622,7 @@ jobs:
568622
strategy:
569623
matrix:
570624
dtype: [fp32]
571-
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
625+
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
572626
fail-fast: false
573627
with:
574628
runner: linux.2xlarge
@@ -815,6 +869,9 @@ jobs:
815869
smollm|coreml_fp32_gpu|--quantize,
816870
llama3|coreml_fp32_gpu|--quantize,
817871
olmo|coreml_fp32_gpu|--quantize,
872+
# roberta|coreml_fp32_gpu|--quantize, roberta requires special HF access
873+
bert|coreml_fp32_gpu|--quantize,
874+
distilbert|coreml_fp32_gpu|--quantize,
818875
]
819876
fail-fast: false
820877
with:

backends/apple/coreml/TARGETS

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,26 @@ runtime.python_library(
6060
],
6161
)
6262

63+
runtime.python_library(
64+
name = "recipes",
65+
srcs = glob([
66+
"recipes/*.py",
67+
]),
68+
visibility = [
69+
"@EXECUTORCH_CLIENTS",
70+
],
71+
deps = [
72+
"fbsource//third-party/pypi/coremltools:coremltools",
73+
":backend",
74+
"//caffe2:torch",
75+
"//executorch/exir:lib",
76+
"//executorch/exir/backend:compile_spec_schema",
77+
"//executorch/exir/backend:partitioner",
78+
"//executorch/exir/backend:utils",
79+
"//executorch/export:lib",
80+
],
81+
)
82+
6383
runtime.cxx_python_extension(
6484
name = "executorchcoreml",
6585
srcs = [
@@ -103,6 +123,7 @@ runtime.python_test(
103123
"fbsource//third-party/pypi/pytest:pytest",
104124
":partitioner",
105125
":quantizer",
126+
":recipes",
106127
"//caffe2:torch",
107128
"//pytorch/vision:torchvision",
108129
],

backends/apple/coreml/compiler/torch_ops.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# coremltools than is used by ExecuTorch. Each op registered here should have a link to a PR in coremltools that adds
99
# the op to the coremltools library.
1010

11+
import numpy as np
1112
import torch as _torch
1213
from coremltools import _logger
1314
from coremltools.converters.mil.frontend import _utils
@@ -21,7 +22,6 @@
2122
transpose,
2223
unbind,
2324
)
24-
2525
from coremltools.converters.mil.frontend.torch.torch_op_registry import (
2626
register_torch_op,
2727
)
@@ -132,3 +132,43 @@ def dequantize_affine(context, node):
132132
name=node.name,
133133
)
134134
context.add(output, node.name)
135+
136+
137+
@register_torch_op(
138+
torch_alias=["quant::dequantize_codebook", "quant.dequantize_codebook"],
139+
override=False,
140+
)
141+
def dequantize_codebook(context, node):
142+
inputs = _get_inputs(context, node, expected=[4, 5])
143+
codes = inputs[0].val
144+
codebook = inputs[1].val
145+
nbits = inputs[2].val
146+
147+
# information in block_size is redundant with codebook.shape
148+
block_size = inputs[3].val # noqa: F841
149+
150+
assert len(codes.shape) == 2, "Only rank 2 inputs are supported"
151+
152+
# Assert codebook is as expected. codebook.dim() = codes.dim() + 2
153+
assert len(codebook.shape) == 4, "Only rank 4 inputs are supported for codebook"
154+
assert codebook.shape[0] == 1, "Only grouped_channel granularity is supported"
155+
n_luts = codebook.shape[1]
156+
assert (
157+
codes.shape[1] % n_luts == 0
158+
), "codes.shape[1] must be divisible by codebook.shape[1]"
159+
assert codebook.shape[2] == 2**nbits
160+
assert codebook.shape[3] == 1, "Only scalar look up values are supported"
161+
162+
if len(inputs) > 4:
163+
output_dtype = inputs[4].val
164+
out_np_dtype = NUM_TO_NUMPY_DTYPE[output_dtype]
165+
_logger.warning(
166+
f"Core ML ignores output_dtype {out_np_dtype} on torchao.dequantize_affine and instead uses the native precision."
167+
)
168+
169+
output = _utils._construct_constexpr_lut_op(
170+
codes.astype(np.int8),
171+
codebook,
172+
name=node.name,
173+
)
174+
context.add(output, node.name)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright © 2025 Apple Inc. All rights reserved.
2+
#
3+
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
4+
5+
6+
from executorch.export import recipe_registry
7+
8+
from .coreml_recipe_provider import CoreMLRecipeProvider
9+
from .coreml_recipe_types import CoreMLRecipeType
10+
11+
# Auto-register CoreML backend recipe provider
12+
recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
13+
14+
__all__ = [
15+
"CoreMLRecipeProvider",
16+
"CoreMLRecipeType",
17+
]

0 commit comments

Comments
 (0)