Skip to content

Commit e6d16be

Browse files
committed
Update on "Transform model to be able to use Attention Sink"
This PR adds necessary functions for transforming the model to be able to use Attention Sink. Differential Revision: [D65571289](https://our.internmc.facebook.com/intern/diff/D65571289/) [ghstack-poisoned]
2 parents 1c0c17c + 552f624 commit e6d16be

File tree

362 files changed

+839433
-3324
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

362 files changed

+839433
-3324
lines changed

.ci/docker/build.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ case "${IMAGE_NAME}" in
3737
ARM_SDK=yes
3838
CLANG_VERSION=12
3939
;;
40+
executorch-ubuntu-22.04-qnn-sdk)
41+
QNN_SDK=yes
42+
CLANG_VERSION=12
43+
;;
4044
executorch-ubuntu-22.04-clang12-android)
4145
LINTRUNNER=""
4246
CLANG_VERSION=12
@@ -72,6 +76,7 @@ docker build \
7276
--build-arg "LINTRUNNER=${LINTRUNNER:-}" \
7377
--build-arg "BUILD_DOCS=${BUILD_DOCS}" \
7478
--build-arg "ARM_SDK=${ARM_SDK:-}" \
79+
--build-arg "QNN_SDK=${QNN_SDK:-}" \
7580
--build-arg "ANDROID_NDK_VERSION=${ANDROID_NDK_VERSION:-}" \
7681
-f "${OS}"/Dockerfile \
7782
"$@" \
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
c8a648d4dffb9f0133ff4a2ea0e660b42105d3ad
1+
19eff28ff3f19b50da46f5a9ff5f4d4d213806fe

.ci/docker/ci_commit_pins/torchao.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

.ci/docker/ubuntu/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,7 @@ COPY --chown=ci-user:ci-user ./arm /opt/arm
8282
# Set up ARM SDK if needed
8383
RUN if [ -n "${ARM_SDK}" ]; then git config --global user.email "[email protected]"; git config --global user.name "OSS CI"; bash /opt/arm/setup.sh --i-agree-to-the-contained-eula /opt/arm-sdk; chown -R ci-user:ci-user /opt/arm-sdk; fi
8484

85+
ARG QNN_SDK
86+
8587
USER ci-user
8688
CMD ["bash"]

.ci/scripts/gather_test_models.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
"ic4": "linux.12xlarge",
2525
"resnet50": "linux.12xlarge",
2626
"llava": "linux.12xlarge",
27+
"llama3_2_vision_encoder": "linux.12xlarge",
28+
"llama3_2_text_decoder": "linux.12xlarge",
2729
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
2830
"dl3": "linux.12xlarge",
2931
"emformer_join": "linux.12xlarge",
@@ -88,8 +90,8 @@ def model_should_run_on_event(model: str, event: str) -> bool:
8890
if event == "pull_request":
8991
return model in ["mv3", "vit"]
9092
elif event == "push":
91-
# 'emformer_predict' is running super slow. Only run it periodically
92-
return model not in ["emformer_predict"]
93+
# These are super slow. Only run it periodically
94+
return model not in ["dl3", "edsr", "emformer_predict"]
9395
else:
9496
return True
9597

.ci/scripts/setup-linux.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ fi
2323
# of nightly. This allows CI to test against latest commits from PyTorch
2424
install_executorch "use-pt-pinned-commit"
2525
build_executorch_runner "${BUILD_TOOL}"
26+
do_not_use_nightly_on_ci

.ci/scripts/test_llama_runner_eager.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,12 @@ run_and_verify() {
4242
-d fp32 \
4343
--max_seq_length 32 \
4444
--temperature 0 \
45+
--show_tokens \
4546
--prompt "Once upon a time," > result.txt
4647

4748
# Verify result.txt
4849
RESULT=$(cat result.txt)
49-
EXPECTED_RESULT="there was a little girl"
50+
EXPECTED_RESULT="727, 471, 263, 2217, 7826, 4257, 365, 2354, 29889, 2296, 18012, 304, 1708, 5377, 297, 278, 6575, 845, 457, 29889, 3118, 2462, 29892, 1183, 4446, 263"
5051
if [[ "${RESULT}" == *"${EXPECTED_RESULT}"* ]]; then
5152
echo "Actual result: ${RESULT}"
5253
echo "Success"

.ci/scripts/test_model.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ test_model() {
7777
# Install requirements for export_llama
7878
bash examples/models/llama/install_requirements.sh
7979
# Test export_llama script: python3 -m examples.models.llama.export_llama
80-
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
80+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
8181
run_portable_executor_runner
8282
rm "./${MODEL_NAME}.pte"
8383
fi
@@ -87,6 +87,10 @@ test_model() {
8787
bash examples/models/llava/install_requirements.sh
8888
STRICT="--no-strict"
8989
fi
90+
if [[ "$MODEL_NAME" == "llama3_2_vision_encoder" || "$MODEL_NAME" == "llama3_2_text_decoder" ]]; then
91+
# Install requirements for llama vision.
92+
bash examples/models/llama3_2_vision/install_requirements.sh
93+
fi
9094
# python3 -m examples.portable.scripts.export --model_name="llama2" should works too
9195
"${PYTHON_EXECUTABLE}" -m examples.portable.scripts.export --model_name="${MODEL_NAME}" "${STRICT}"
9296
run_portable_executor_runner

.ci/scripts/utils.sh

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,10 +113,26 @@ cmake_install_executorch_lib() {
113113
}
114114

115115
download_stories_model_artifacts() {
116-
# Download stories110M.pt and tokenizer from Github
116+
# Download stories110M.pt and tokenizer from Github
117117
curl -Ls "https://huggingface.co/karpathy/tinyllamas/resolve/main/stories110M.pt" --output stories110M.pt
118118
curl -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokenizer.model" --output tokenizer.model
119119
# Create params.json file
120120
touch params.json
121121
echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
122122
}
123+
124+
do_not_use_nightly_on_ci() {
125+
# An assert to make sure that we are not using PyTorch nightly on CI to prevent
126+
# regression as documented in https://github.com/pytorch/executorch/pull/6564
127+
TORCH_VERSION=$(pip list | grep -w 'torch ' | awk -F ' ' {'print $2'} | tr -d '\n')
128+
129+
# The version of PyTorch building from source looks like 2.6.0a0+gitc8a648d that
130+
# includes the commit while nightly (2.6.0.dev20241019+cpu) or release (2.6.0)
131+
# won't have that. Note that we couldn't check for the exact commit from the pin
132+
# ci_commit_pins/pytorch.txt here because the value will be different when running
133+
# this on PyTorch CI
134+
if [[ "${TORCH_VERSION}" != *"+git"* ]]; then
135+
echo "Unexpected torch version. Expected binary built from source, got ${TORCH_VERSION}"
136+
exit 1
137+
fi
138+
}

.github/workflows/_android.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ jobs:
7373
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug.apk
7474
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug-androidTest.apk
7575
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/fp32-xnnpack-custom/model.zip
76+
curl -o android-test-debug.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch-debug.apk
77+
curl -o android-test-debug-androidTest.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch-debug-androidTest.apk
7678
unzip model.zip
7779
mv *.pte model.pte
7880

0 commit comments

Comments
 (0)