Skip to content

Commit 3a6b545

Browse files
committed
Update on " [ExecuTorch][BE] Split kv cache and SDPA for better code sharing"
Summary: Why? We have coupled SDPA with kv cache for a while. Initially this was done as we implemented sdpa_with_kv_cache custom op to reduce multiple copy overheads from kv cache update. (This could have been done by having separate custom kv cache update and custom sdpa op. Recent changes enabled this.) As a result of SDPA module owning kv cache, we get a) non-composable implementation and b) harder to reuse model definition and components from repos like tune. Output of this is that we have multiple definition of the same model, llama, lying around in ET, TorchChat and Tune. This diff and subsequent ones will try to move in the direction where custom kv cache and custom sdpa become decoupled and composable, making it more module-swap friendly with tune's model definition. How. Earlier PRs decoupled kv cache update from sdpa. So now 1. Decouple SDPA nn.Module from KV cache. 2. Standardize on KVCache and SDPA interface. That is KVCache and SDPA both operate on q, k, v in [B, # heads, seq_len, head_dim] formatted tensors. 3. 2 will introduce multiple tranposes when KVCache and SDPA are replaced by custom modules, but we will write graph pass to undo those. Test Plan: Existing tests. Make sure perf doesnt regress Differential Revision: [D67914054](https://our.internmc.facebook.com/intern/diff/D67914054) [ghstack-poisoned]
2 parents d20dd95 + df1383b commit 3a6b545

File tree

207 files changed

+5800
-2245
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

207 files changed

+5800
-2245
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2024-05-15
1+
2024-12-16
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2ea4b56ec872424e486c4fe2d55da061067a2ed3
1+
0a94bb432ed75cc2d950d81b2921363218a7e459

.ci/docker/conda-env-ci.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
cmake=3.22.1
22
ninja=1.10.2
3+
libuv
4+
pkg-config
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
# Copyright 2024 Arm Limited and/or its affiliates.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
# NB: This function could be used to install Arm dependencies
8+
# Setup arm example environment (including TOSA tools)
9+
git config --global user.email "[email protected]"
10+
git config --global user.name "Github Executorch"
11+
bash examples/arm/setup.sh --i-agree-to-the-contained-eula

.ci/scripts/setup-macos.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,5 +131,9 @@ if [[ -z "${GITHUB_RUNNER:-}" ]]; then
131131
fi
132132

133133
print_cmake_info
134-
install_executorch
134+
install_pytorch_and_domains
135+
# We build PyTorch from source here instead of using nightly. This allows CI to test against
136+
# the pinned commit from PyTorch
137+
install_executorch "use-pt-pinned-commit"
135138
build_executorch_runner "${BUILD_TOOL}"
139+
do_not_use_nightly_on_ci

.ci/scripts/utils.sh

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,42 @@ install_pip_dependencies() {
4040
popd || return
4141
}
4242

43+
install_domains() {
44+
echo "Install torchvision and torchaudio"
45+
pip install --no-use-pep517 --user "git+https://github.com/pytorch/audio.git@${TORCHAUDIO_VERSION}"
46+
pip install --no-use-pep517 --user "git+https://github.com/pytorch/vision.git@${TORCHVISION_VERSION}"
47+
}
48+
49+
install_pytorch_and_domains() {
50+
pushd .ci/docker || return
51+
TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
52+
popd || return
53+
54+
git clone https://github.com/pytorch/pytorch.git
55+
56+
# Fetch the target commit
57+
pushd pytorch || return
58+
git checkout "${TORCH_VERSION}"
59+
git submodule update --init --recursive
60+
61+
export USE_DISTRIBUTED=1
62+
# Then build and install PyTorch
63+
python setup.py bdist_wheel
64+
pip install "$(echo dist/*.whl)"
65+
66+
# Grab the pinned audio and vision commits from PyTorch
67+
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
68+
export TORCHAUDIO_VERSION
69+
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
70+
export TORCHVISION_VERSION
71+
72+
install_domains
73+
74+
popd || return
75+
# Print sccache stats for debugging
76+
sccache --show-stats || true
77+
}
78+
4379
install_flatc_from_source() {
4480
# NB: This function could be used to install flatbuffer from source
4581
pushd third-party/flatbuffers || return
@@ -59,17 +95,6 @@ install_flatc_from_source() {
5995
popd || return
6096
}
6197

62-
install_arm() {
63-
# NB: This function could be used to install Arm dependencies
64-
# Setup arm example environment (including TOSA tools)
65-
git config --global user.email "[email protected]"
66-
git config --global user.name "Github Executorch"
67-
bash examples/arm/setup.sh --i-agree-to-the-contained-eula
68-
69-
# Test tosa_reference flow
70-
source examples/arm/ethos-u-scratch/setup_path.sh
71-
}
72-
7398
build_executorch_runner_buck2() {
7499
# Build executorch runtime with retry as this step is flaky on macos CI
75100
retry buck2 build //examples/portable/executor_runner:executor_runner

.github/workflows/apple-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,7 @@ jobs:
410410
runs-on: linux.2xlarge
411411
steps:
412412
- name: Download the apps from GitHub
413-
uses: actions/download-artifact@v3
413+
uses: actions/download-artifact@v4
414414
with:
415415
# The name here needs to match the name of the upload-artifact parameter
416416
name: ios-apps

.github/workflows/apple.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ jobs:
5353
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
5454
timeout: 90
5555
secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_DEMO_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
56-
upload-artifact: ios-apps
56+
upload-artifact: ios-demo-app
5757
script: |
5858
set -eux
5959
@@ -83,10 +83,10 @@ jobs:
8383
runs-on: linux.2xlarge
8484
steps:
8585
- name: Download the artifacts from GitHub
86-
uses: actions/download-artifact@v3
86+
uses: actions/download-artifact@v4
8787
with:
8888
# The name here needs to match the name of the upload-artifact parameter
89-
name: ios-apps
89+
name: ios-demo-app
9090
path: ${{ runner.temp }}/artifacts/
9191

9292
- name: Verify the artifacts
@@ -216,7 +216,7 @@ jobs:
216216
role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-ios
217217
aws-region: us-east-1
218218
- name: Download the artifact
219-
uses: actions/download-artifact@v3
219+
uses: actions/download-artifact@v4
220220
with:
221221
# NB: The name here needs to match the upload-artifact name from build-frameworks-ios job
222222
name: executorch-frameworks-ios
@@ -291,7 +291,7 @@ jobs:
291291
python-version: '3.11'
292292
submodules: 'true'
293293
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
294-
upload-artifact: ios-apps
294+
upload-artifact: ios-benchmark-app
295295
secrets-env: BUILD_CERTIFICATE_BASE64 EXECUTORCH_BENCHMARK_BUILD_PROVISION_PROFILE_BASE64 KEYCHAIN_PASSWORD
296296
timeout: 90
297297
script: |

.github/workflows/pull.yml

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -354,13 +354,11 @@ jobs:
354354
EXECUTORCH_BUILD_ARM_BAREMETAL=ON \
355355
.ci/scripts/setup-linux.sh "${BUILD_TOOL}"
356356
357-
source .ci/scripts/utils.sh
358357
# Install Arm dependencies
359-
install_arm
360-
361-
# Run pytest with coverage
362-
pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test
358+
.ci/scripts/setup-arm-baremetal-tools.sh
363359
360+
# Run pytest without simulator
361+
backends/arm/test/test_arm_baremetal.sh test_pytest
364362
365363
test-llama-runner-qnn-linux:
366364
name: test-llama-runner-qnn-linux

.github/workflows/trunk.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,14 +146,15 @@ jobs:
146146
source .ci/scripts/utils.sh
147147
install_executorch
148148
149-
install_arm
149+
.ci/scripts/setup-arm-baremetal-tools.sh
150150
151151
# Increase number of files user can monitor to bypass buck failures.
152152
# Hopefully this is high enough for this setup.
153153
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
154154
155155
# Test ethos-u delegate examples with run.sh
156-
PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/
156+
backends/arm/test/test_arm_baremetal.sh test_run_ethosu_fvp
157+
157158
158159
test-arm-reference-delegation:
159160
name: test-arm-reference-delegation
@@ -172,10 +173,10 @@ jobs:
172173
source .ci/scripts/utils.sh
173174
install_executorch
174175
175-
install_arm
176+
.ci/scripts/setup-arm-baremetal-tools.sh
176177
177-
# Run arm unit tests
178-
pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test
178+
# Run arm unit tests using the simulator
179+
backends/arm/test/test_arm_baremetal.sh test_pytest_ethosu_fvp
179180
180181
test-coreml-delegate:
181182
name: test-coreml-delegate

0 commit comments

Comments
 (0)