Skip to content

Commit 7baa27d

Browse files
committed
Update on "add attention_sink.py"
This PR adds `KVCacheWithAttentionSink`, which is required for `AttentionSink`. It keeps the first `sink_size` tokens as attention sinks and maintains a sliding window with `window_size` for new tokens. Note: I am trying to implement and verify `AttentionSink` in eager mode first. So the current implementation may still have some lower errors or performance issue. For example, it does not support the case when dynamic shape is disabled. Will leave these problems to resolve when we are ready to deploy `AttentionSink` to edge. Differential Revision: [D65235798](https://our.internmc.facebook.com/intern/diff/D65235798/) [ghstack-poisoned]
2 parents 7140dec + 763dde2 commit 7baa27d

File tree

240 files changed

+10323
-2210
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

240 files changed

+10323
-2210
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
bd5482c7c3e1197e10c46ff739027f917d9c1fcc
1+
c8a648d4dffb9f0133ff4a2ea0e660b42105d3ad

.ci/docker/common/install_clang.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ install_ubuntu() {
1313
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION"
1414
apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"
1515
# Also require LLD linker from llvm and libomp to build PyTorch from source
16-
apt-get install -y lld "libomp-${CLANG_VERSION}-dev"
16+
apt-get install -y lld "libomp-${CLANG_VERSION}-dev" "libc++-${CLANG_VERSION}-dev"
1717

1818
# Use update-alternatives to make this version the default
1919
update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50

.ci/docker/requirements-ci.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
mpmath==1.3.0
2-
numpy==1.22.0; python_version == '3.10'
2+
numpy==1.21.3; python_version == '3.10'
33
numpy==1.23.2; python_version == '3.11'
44
numpy; python_version >= '3.12'
55
PyYAML==6.0.1

.ci/scripts/setup-linux.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ else
1919
fi
2020

2121
# As Linux job is running inside a Docker container, all of its dependencies
22-
# have already been installed
23-
install_executorch
22+
# have already been installed, so we use PyTorch build from source here instead
23+
# of nightly. This allows CI to test against latest commits from PyTorch
24+
install_executorch "use-pt-pinned-commit"
2425
build_executorch_runner "${BUILD_TOOL}"

.ci/scripts/setup-qnn-deps.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,9 @@ install_qnn() {
3131
}
3232

3333
setup_libc++() {
34+
clang_version=$1
3435
sudo apt-get update
35-
pkgs_to_check=('libc++-dev')
36+
pkgs_to_check=("libc++-${clang_version}-dev")
3637
j=0
3738
while [ $j -lt ${#pkgs_to_check[*]} ]; do
3839
install_status=$(verify_pkg_installed ${pkgs_to_check[$j]})
@@ -47,5 +48,6 @@ setup_libc++() {
4748
done
4849
}
4950

50-
setup_libc++
51+
# This needs to match with the clang version from the Docker image
52+
setup_libc++ 12
5153
install_qnn

.ci/scripts/utils.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,11 @@ install_executorch() {
2020
which pip
2121
# Install executorch, this assumes that Executorch is checked out in the
2222
# current directory.
23-
# TODO(T199538337): clean up install scripts to use install_requirements.sh
24-
./install_requirements.sh --pybind xnnpack
23+
if [[ "${1:-}" == "use-pt-pinned-commit" ]]; then
24+
./install_requirements.sh --pybind xnnpack --use-pt-pinned-commit
25+
else
26+
./install_requirements.sh --pybind xnnpack
27+
fi
2528
# Just print out the list of packages for debugging
2629
pip list
2730
}

.github/pull_request_template.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
### Summary
2+
[PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines.
3+
4+
[PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line.
5+
6+
[PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests).
7+
8+
### Test plan
9+
[PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.

.github/workflows/_android.yml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ jobs:
6666
# avoid permission issue
6767
sudo chown -R "${USER}" /opt/android
6868
69+
- name: Download Artifacts
70+
shell: bash
71+
run: |
72+
set -eux
73+
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug.apk
74+
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug-androidTest.apk
75+
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/fp32-xnnpack-custom/model.zip
76+
unzip model.zip
77+
mv *.pte model.pte
78+
6979
- name: Gradle cache
7080
uses: gradle/actions/setup-gradle@v3
7181

.github/workflows/_unittest.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ jobs:
3737
CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
3838
.ci/scripts/setup-linux.sh cmake
3939
40+
# Install llama3_2_vision dependencies.
41+
PYTHON_EXECUTABLE=python ./examples/models/llama3_2_vision/install_requirements.sh
42+
4043
# Run pytest with coverage
4144
pytest -n auto --cov=./ --cov-report=xml
4245
# Run gtest
@@ -67,6 +70,10 @@ jobs:
6770
${CONDA_RUN} --no-capture-output \
6871
.ci/scripts/setup-macos.sh cmake
6972
73+
# Install llama3_2_vision dependencies.
74+
PYTHON_EXECUTABLE=python ${CONDA_RUN} --no-capture-output \
75+
./examples/models/llama3_2_vision/install_requirements.sh
76+
7077
# Run pytest with coverage
7178
${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
7279
# Run gtest

.github/workflows/pull.yml

Lines changed: 17 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,6 @@ jobs:
3535
name: test-setup-linux-gcc
3636
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
3737
strategy:
38-
matrix:
39-
include:
40-
- build-tool: cmake
4138
fail-fast: false
4239
with:
4340
runner: linux.2xlarge
@@ -50,7 +47,7 @@ jobs:
5047
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
5148
conda activate "${CONDA_ENV}"
5249
53-
BUILD_TOOL=${{ matrix.build-tool }}
50+
BUILD_TOOL="cmake"
5451
5552
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
5653
# Build and test ExecuTorch with the add model on portable backend.
@@ -89,20 +86,11 @@ jobs:
8986
strategy:
9087
matrix:
9188
dtype: [fp32]
92-
build-tool: [buck2, cmake]
9389
mode: [portable, xnnpack+custom, xnnpack+custom+qe]
9490
include:
9591
- dtype: bf16
96-
build-tool: cmake
97-
mode: portable
98-
- dtype: bf16
99-
build-tool: buck2
10092
mode: portable
10193
- dtype: bf16
102-
build-tool: cmake
103-
mode: custom
104-
- dtype: bf16
105-
build-tool: buck2
10694
mode: custom
10795
fail-fast: false
10896
with:
@@ -111,29 +99,30 @@ jobs:
11199
submodules: 'true'
112100
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
113101
timeout: 900
102+
upload-artifact: android-models
103+
upload-artifact-to-s3: true
114104
script: |
115105
# The generic Linux job chooses to use base env, not the one setup by the image
116106
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
117107
conda activate "${CONDA_ENV}"
118108
119109
DTYPE=${{ matrix.dtype }}
120-
BUILD_TOOL=${{ matrix.build-tool }}
110+
BUILD_TOOL="cmake"
121111
MODE=${{ matrix.mode }}
112+
ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
113+
ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
122114
123115
# Setup executorch
124-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
116+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
125117
# Install requirements for export_llama
126118
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
127119
# Test llama2
128-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}"
120+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh stories110M "${BUILD_TOOL}" "${DTYPE}" "${MODE}" "${ARTIFACTS_DIR_NAME}"
129121
130122
test-llama-runner-linux-android:
131123
name: test-llama-runner-linux-android
132124
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
133125
strategy:
134-
matrix:
135-
include:
136-
- build-tool: cmake
137126
fail-fast: false
138127
with:
139128
runner: linux.2xlarge
@@ -146,18 +135,14 @@ jobs:
146135
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
147136
conda activate "${CONDA_ENV}"
148137
149-
BUILD_TOOL=${{ matrix.build-tool }}
138+
BUILD_TOOL="cmake"
150139
PYTHON_EXECUTABLE=python \
151140
bash .ci/scripts/build_llama_android.sh "${BUILD_TOOL}"
152141
153142
test-custom-ops-linux:
154143
name: test-custom-ops-linux
155144
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
156145
strategy:
157-
matrix:
158-
include:
159-
- build-tool: buck2
160-
- build-tool: cmake
161146
fail-fast: false
162147
with:
163148
runner: linux.2xlarge
@@ -170,7 +155,7 @@ jobs:
170155
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
171156
conda activate "${CONDA_ENV}"
172157
173-
BUILD_TOOL=${{ matrix.build-tool }}
158+
BUILD_TOOL="cmake"
174159
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
175160
# Test custom ops
176161
PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
@@ -179,10 +164,6 @@ jobs:
179164
name: test-selective-build-linux
180165
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
181166
strategy:
182-
matrix:
183-
include:
184-
- build-tool: buck2
185-
- build-tool: cmake
186167
fail-fast: false
187168
with:
188169
runner: linux.2xlarge
@@ -195,7 +176,7 @@ jobs:
195176
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
196177
conda activate "${CONDA_ENV}"
197178
198-
BUILD_TOOL=${{ matrix.build-tool }}
179+
BUILD_TOOL="cmake"
199180
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
200181
# Test selective build
201182
PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
@@ -235,9 +216,6 @@ jobs:
235216
name: test-quantized-aot-lib-linux
236217
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
237218
strategy:
238-
matrix:
239-
include:
240-
- build-tool: cmake
241219
fail-fast: false
242220
with:
243221
runner: linux.2xlarge
@@ -250,17 +228,14 @@ jobs:
250228
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
251229
conda activate "${CONDA_ENV}"
252230
253-
BUILD_TOOL=${{ matrix.build-tool }}
231+
BUILD_TOOL="cmake"
254232
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
255233
PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
256234
257235
test-pybind-build-linux:
258236
name: test-pybind-build-linux
259237
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
260238
strategy:
261-
matrix:
262-
include:
263-
- build-tool: cmake
264239
fail-fast: false
265240
with:
266241
runner: linux.2xlarge
@@ -274,7 +249,7 @@ jobs:
274249
conda activate "${CONDA_ENV}"
275250
276251
# build module for executorch.extension.pybindings.portable_lib
277-
BUILD_TOOL=${{ matrix.build-tool }}
252+
BUILD_TOOL="cmake"
278253
PYTHON_EXECUTABLE=python \
279254
EXECUTORCH_BUILD_XNNPACK=ON \
280255
EXECUTORCH_BUILD_PYBIND=ON \
@@ -349,6 +324,7 @@ jobs:
349324
350325
android:
351326
uses: ./.github/workflows/_android.yml
327+
needs: test-llama-runner-linux
352328

353329
unittest:
354330
uses: ./.github/workflows/_unittest.yml
@@ -357,10 +333,6 @@ jobs:
357333

358334
unittest-arm:
359335
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
360-
strategy:
361-
matrix:
362-
include:
363-
- build-tool: buck2
364336
with:
365337
runner: linux.2xlarge
366338
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -374,7 +346,7 @@ jobs:
374346
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
375347
conda activate "${CONDA_ENV}"
376348
377-
BUILD_TOOL=${{ matrix.build-tool }}
349+
BUILD_TOOL="cmake"
378350
379351
# Setup MacOS dependencies as there is no Docker support on MacOS atm
380352
PYTHON_EXECUTABLE=python \
@@ -396,7 +368,6 @@ jobs:
396368
strategy:
397369
matrix:
398370
dtype: [fp32]
399-
build-tool: [cmake]
400371
mode: [qnn]
401372
fail-fast: false
402373
with:
@@ -411,14 +382,14 @@ jobs:
411382
conda activate "${CONDA_ENV}"
412383
413384
DTYPE=${{ matrix.dtype }}
414-
BUILD_TOOL=${{ matrix.build-tool }}
385+
BUILD_TOOL="cmake"
415386
MODE=${{ matrix.mode }}
416387
417388
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
418389
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
419390
420391
# Setup executorch
421-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh buck2
392+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
422393
# Install requirements for export_llama
423394
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
424395
# Test llama2

0 commit comments

Comments
 (0)