Skip to content

Commit 9152a97

Browse files
author
ssjia
committed
Update on "[ET-VK] Add mechanism to trigger command buffer re-encode only when necessary"
## Context Dynamic shape models currently will require the command buffer to be re-encoded every inference. However, this introduces a significant overhead when running models that require dynamic shapes. The reality is that a command buffer re-encode may not be needed every frame. A command buffer re-encode will only be needed when: 1. Shader dispatch parameters change; i.e. new tensor sizes require a completely different compute shader, require new local work group sizing, or require new work group grid size (i.e. global work group size / local work group size) 2. Push constants containing tensor metadata need to be updated This diff aims to reduce the overhead of triggering tensor shape change by detecting when a command buffer re-encode is actually needed. ## Changes `ComputeGraph`: * Introduce `requires_reencode` flag to `ComputeGraph` to indicate when a command buffer re-encode is needed. * Introduce a `std::set<ValueRef>` tracking which values were updated when propagating tensor sizes * "update" can be one of two things: 1) tensor sizes changed 2) symint value changed `DispatchNode`: * When propagating new tensor sizes, only execute the resize function if any of the values participating in the computation have been updated * Mark `requries_reencode` if any push constants associated with tensor metadata need to be udpated `DynamicDispatchNode`: * Only recompute compute shader dispatch params if any of the values participating in the computation have been updated * Mark `requires_reencode` if 1) a new compute shader is required, 2) local work group size changed, 3) work group grid size changed Differential Revision: [D79813237](https://our.internmc.facebook.com/intern/diff/D79813237/) [ghstack-poisoned]
2 parents 1e1df9e + 11be0f6 commit 9152a97

File tree

146 files changed

+5307
-1199
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

146 files changed

+5307
-1199
lines changed

.ci/scripts/test_model.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,6 @@ test_model_with_qnn() {
201201
EXPORT_SCRIPT=bert
202202
elif [[ "${MODEL_NAME}" == "conv_former" ]]; then
203203
EXPORT_SCRIPT=conv_former
204-
EXTRA_FLAGS="--dataset imagenet-mini/val"
205204
elif [[ "${MODEL_NAME}" == "cvt" ]]; then
206205
EXPORT_SCRIPT=cvt
207206
elif [[ "${MODEL_NAME}" == "distilbert" ]]; then

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,12 @@ echo "Creating tokenizer.bin"
3333
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
3434

3535
set +e
36-
# Compile only as weight sharing is not applicable on x86
37-
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --llama_artifacts . --compile_only
36+
# Compile only as weight sharing is not applicable on x86.
37+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
3838
exit_code1=$?
3939

4040
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
41-
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir . --llama_artifacts . --enable_x86_64
41+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
4242
exit_code2=$?
4343

4444
# Check BC
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
name: Add Open External Contributor PRs and Issues to PyTorch Org Project 136
2+
3+
on:
4+
schedule:
5+
- cron: '0 * * * *'
6+
workflow_dispatch:
7+
8+
jobs:
9+
add_to_project:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
13+
uses: actions/github-script@v7
14+
with:
15+
github-token: ${{ secrets.PYTORCH_PROJECT_PAT }}
16+
script: |
17+
const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
18+
const owner = 'pytorch';
19+
const repo = 'executorch';
20+
21+
// List of authors to exclude
22+
const excludedAuthors = new Set([
23+
"nil-is-all", "cbilgin", "KimishPatel", "psiddh", "digantdesai", "SS-JIA", "ahmtox", "mcr229", "shoumikhin",
24+
"manuelcandales", "metascroy", "cccclai", "rohansjoshi", "kirklandsign", "abhinaykukkadapu", "JacobSzwejbka",
25+
"Conarnar", "lucylq", "larryliu0820", "BujSet", "Gasoonjia", "Juntian777", "guangy10", "jackzhxng",
26+
"GregoryComer", "leafs1", "swolchok", "mergennachin", "tarun292", "byjlw", "jathu", "Jack-Khuu", "georgehong",
27+
"zhenyan-zhang-meta", "silverguo", "dbort", "jorgep31415", "huydhn", "mcremon-meta", "trivedivivek", "angelayi",
28+
"helunwencser", "hsharma35", "zhxchen17", "iseeyuan", "svekars", "nathanaelsee", "dulinriley", "jerryzh168",
29+
"cmodi-meta", "bigfootjon", "sxu", "ydwu4", "Riandy", "tugsbayasgalan", "bsoyluoglu", "yangw-dev", "YIWENX14",
30+
"namanahuja", "yushangdi", "limintang", "pianpwk", "viveknayakatmeta", "andreanicastro", "JakeStevens",
31+
"gmagogsfm", "zonglinpeng", "eigen-k", "derekxu", "salilsdesai", "skrtskrtfb", "pssrawat", "r-barnes", "pytorchbot",
32+
"pytorchmergebot", "pytorchupdatebot", "facebook-github-bot", "Erik-Lundell", "zingo", "AdrianLundell",
33+
"oscarandersson8218", "per", "Sebastian-Larsson", "SaoirseARM", "robell", "mansnils", "martinlsm", "freddan80",
34+
"YufengShi-dudu", "tom-arm", "perheld", "Jerry-Ge", "gggekov", "fumchin", "wwwind", "haowhsu-quic", "shewu-quic",
35+
"winskuo-quic", "chunit-quic", "DannyYuyang-quic", "chuntl", "cymbalrush", "DenisVieriu97", "billmguo",
36+
"StrycekSimon", "jirioc", "robert-kalmar", "skywall", "neuropilot-captain"
37+
]);
38+
39+
async function addItem(contentId, type, number) {
40+
try {
41+
await github.graphql(`
42+
mutation {
43+
addProjectV2ItemById(input: {projectId: "${projectId}", contentId: "${contentId}"}) {
44+
item { id }
45+
}
46+
}
47+
`);
48+
console.log(`Added ${type} #${number} to project`);
49+
} catch (error) {
50+
if (error.message && error.message.includes("A project item already exists for this content")) {
51+
// Ignore if already exists
52+
console.log(`${type} #${number} already in project`);
53+
} else {
54+
console.log(`Error adding ${type} #${number}: ${error.message}`);
55+
}
56+
}
57+
}
58+
59+
try {
60+
// Add open issues (not PRs) and exclude by author
61+
const issues = await github.paginate(
62+
github.rest.issues.listForRepo,
63+
{
64+
owner,
65+
repo,
66+
state: 'open',
67+
filter: 'all'
68+
}
69+
);
70+
for (const issue of issues) {
71+
if (!issue.pull_request && !excludedAuthors.has(issue.user.login)) {
72+
await addItem(issue.node_id, 'issue', issue.number);
73+
}
74+
}
75+
76+
// Add open, non-draft PRs (regardless of review state), exclude by author
77+
const prs = await github.paginate(
78+
github.rest.pulls.list,
79+
{
80+
owner,
81+
repo,
82+
state: 'open',
83+
draft: false,
84+
}
85+
);
86+
for (const pr of prs) {
87+
if (!excludedAuthors.has(pr.user.login)) {
88+
await addItem(pr.node_id, 'pr', pr.number);
89+
}
90+
}
91+
} catch (error) {
92+
core.setFailed(`Workflow failed: ${error.message}`);
93+
}

.github/workflows/pull.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ jobs:
315315
bash examples/models/moshi/mimi/install_requirements.sh
316316
317317
# reinstall executorch
318-
bash ./install_executorch.sh
318+
bash ./install_executorch.sh --minimal
319319
320320
# run python unittest
321321
python -m unittest examples.models.moshi.mimi.test_mimi

.github/workflows/trunk.yml

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ jobs:
6060
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
6161
strategy:
6262
matrix:
63-
model: [add]
63+
model: [add, softmax, mv2]
6464
fail-fast: false
6565
with:
6666
runner: linux.2xlarge
@@ -72,6 +72,16 @@ jobs:
7272
MODEL_NAME=${{ matrix.model }}
7373
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
7474
conda activate "${CONDA_ENV}"
75+
if [[ ${{ matrix.model}} == "add" ]]; then
76+
SIM_LIMIT_SEC=60
77+
elif [[ ${{ matrix.model}} == "softmax" ]]; then
78+
SIM_LIMIT_SEC=60
79+
elif [[ ${{ matrix.model}} == "mv2" ]]; then
80+
SIM_LIMIT_SEC=5000
81+
else
82+
echo "Failed unsupported model selection ${{ matrix.model }}"
83+
exit 1
84+
fi
7585
7686
source .ci/scripts/utils.sh
7787
source .ci/scripts/zephyr-utils.sh
@@ -80,20 +90,23 @@ jobs:
8090
export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
8191
export ARM_FVP_TUTORIALS_ROOT=$ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm-fvp-tutorials
8292
93+
# TODO @Bujji: Should see if this can be moved into the docker image itself
8394
download_arm_zephyr_sdk
8495
./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
85-
8696
cd $ZEPHYR_PROJ_ROOT
8797
setup_zephyr_et_module
8898
99+
# Run setup scripts for Arm FVP and Arm AOT Compilation
89100
cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
90101
install_executorch "--use-pt-pinned-commit"
91102
.ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
92103
source examples/arm/ethos-u-scratch/setup_path.sh
93104
source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
94105
95106
# Get the model as PTE
96-
python -m examples.arm.aot_arm_compiler --model_name="${MODEL_NAME}" --output="${MODEL_NAME}.pte"
107+
python -m examples.arm.aot_arm_compiler \
108+
--model_name="${MODEL_NAME}" \
109+
--output="${MODEL_NAME}.pte"
97110
98111
# Generate the C-style header
99112
cd $ARM_FVP_TUTORIALS_ROOT
@@ -105,7 +118,8 @@ jobs:
105118
cd $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/
106119
107120
# Build the zephyr elf
108-
west build -p always -b mps3/corstone300/fvp
121+
west build -p always -b mps3/corstone300/fvp -- \
122+
-DET_PTE_FILE_PATH_FOR_SELECTIVE_BUILD=$ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte
109123
110124
# Run the simulation
111125
FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf \
@@ -114,23 +128,29 @@ jobs:
114128
-C mps3_board.uart0.out_file='sim.out' \
115129
-C cpu0.CFGITCMSZ=15 \
116130
-C cpu0.CFGDTCMSZ=15 \
117-
--simlimit 120
131+
--simlimit ${SIM_LIMIT_SEC}
118132
133+
# Disable exit on error
134+
set +e
119135
# Report failure if any of the ouptut verification checks fail
120136
grep -qF "ERROR" sim.out
121137
exit_status=$? #store 0 if found (failure), 1 if not (success)
122138
if [[ "$exit_status" -eq "0" ]]; then
123-
cat sim.out
124-
exit 1
139+
cat sim.out
140+
set -e
141+
exit 1
125142
fi
126143
127144
# Report fail if simulation does not complete successfully
128145
grep -qF "SUCCESS: Program complete, exiting." sim.out
129146
exit_status=$? #store 0 if found (success), 1 if not (failure)
130147
if [[ "$exit_status" -eq "1" ]]; then
131-
cat sim.out
132-
exit 1
148+
cat sim.out
149+
set -e
150+
exit 1
133151
fi
152+
# Re-enable exit on error
153+
set -e
134154
135155
test-models-linux-aarch64:
136156
name: test-models-linux-aarch64

backends/apple/coreml/TARGETS

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,26 @@ runtime.python_library(
6060
],
6161
)
6262

63+
runtime.python_library(
64+
name = "recipes",
65+
srcs = glob([
66+
"recipes/*.py",
67+
]),
68+
visibility = [
69+
"@EXECUTORCH_CLIENTS",
70+
],
71+
deps = [
72+
"fbsource//third-party/pypi/coremltools:coremltools",
73+
":backend",
74+
"//caffe2:torch",
75+
"//executorch/exir:lib",
76+
"//executorch/exir/backend:compile_spec_schema",
77+
"//executorch/exir/backend:partitioner",
78+
"//executorch/exir/backend:utils",
79+
"//executorch/export:lib",
80+
],
81+
)
82+
6383
runtime.cxx_python_extension(
6484
name = "executorchcoreml",
6585
srcs = [
@@ -103,6 +123,7 @@ runtime.python_test(
103123
"fbsource//third-party/pypi/pytest:pytest",
104124
":partitioner",
105125
":quantizer",
126+
":recipes",
106127
"//caffe2:torch",
107128
"//pytorch/vision:torchvision",
108129
],
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# Copyright © 2025 Apple Inc. All rights reserved.
2+
#
3+
# Please refer to the license found in the LICENSE file in the root directory of the source tree.
4+
5+
6+
from executorch.export import recipe_registry
7+
8+
from .coreml_recipe_provider import CoreMLRecipeProvider
9+
from .coreml_recipe_types import CoreMLRecipeType
10+
11+
# Auto-register CoreML backend recipe provider
12+
recipe_registry.register_backend_recipe_provider(CoreMLRecipeProvider())
13+
14+
__all__ = [
15+
"CoreMLRecipeProvider",
16+
"CoreMLRecipeType",
17+
]

0 commit comments

Comments
 (0)