Skip to content

Commit f4f089e

Browse files
committed
Update on "[Executorch][llama] Change runner to decouple prompt length from sequence
length" length Following previous diff now we can utilize entire kv cache to generate more tokens than max prompt length allowed. Differential Revision: [D69073908](https://our.internmc.facebook.com/intern/diff/D69073908/) [ghstack-poisoned]
2 parents bcfb3a4 + 10ef2c0 commit f4f089e

File tree

112 files changed

+1568
-611
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+1568
-611
lines changed

.ci/scripts/build_android_instrumentation.sh

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,10 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1212
fi
1313
which "${PYTHON_EXECUTABLE}"
1414

15-
build_android_test() {
16-
mkdir -p extension/android/executorch_android/src/androidTest/resources
17-
cp extension/module/test/resources/add.pte extension/android/executorch_android/src/androidTest/resources
18-
pushd extension/android
19-
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:testDebugUnitTest
20-
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest
21-
popd
22-
}
15+
mkdir -p "${BUILD_AAR_DIR}"/executorch_android/src/androidTest/resources
16+
cp extension/module/test/resources/add.pte "${BUILD_AAR_DIR}"/executorch_android/src/androidTest/resources
2317

24-
collect_artifacts_to_be_uploaded() {
25-
ARTIFACTS_DIR_NAME="$1"
26-
# Collect Java library test
27-
JAVA_LIBRARY_TEST_DIR="${ARTIFACTS_DIR_NAME}/library_test_dir"
28-
mkdir -p "${JAVA_LIBRARY_TEST_DIR}"
29-
cp extension/android/executorch_android/build/outputs/apk/androidTest/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
30-
}
31-
32-
main() {
33-
build_android_test
34-
if [ -n "$ARTIFACTS_DIR_NAME" ]; then
35-
collect_artifacts_to_be_uploaded ${ARTIFACTS_DIR_NAME}
36-
fi
37-
}
38-
39-
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
40-
main "$@"
41-
fi
18+
pushd "${BUILD_AAR_DIR}"
19+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:testDebugUnitTest
20+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest
21+
popd

.ci/scripts/gather_test_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"dl3": "linux.4xlarge.memory",
3434
"emformer_join": "linux.4xlarge.memory",
3535
"emformer_predict": "linux.4xlarge.memory",
36-
"phi-4-mini": "linux.4xlarge.memory",
36+
"phi_4_mini": "linux.4xlarge.memory",
3737
}
3838
}
3939

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ ${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
7878
-qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
7979
--group_size ${QLINEAR_GROUP_SIZE} \
8080
-E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
81-
--disable_dynamic_shape \
8281
-d fp32
8382

8483
# Test run

.ci/scripts/test_model.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ test_model() {
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103-
if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
103+
if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
104104
# Install requirements for export_llama
105105
bash examples/models/llama/install_requirements.sh
106106
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107-
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
107+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi_4_mini/config.json
108108
run_portable_executor_runner
109109
rm "./${MODEL_NAME}.pte"
110110
return

.ci/scripts/unittest-macos-buck2.sh

100644100755
File mode changed.

.ci/scripts/unittest-macos.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ export TMP_DIR=$(mktemp -d)
1919
export PATH="${TMP_DIR}:$PATH"
2020
trap 'rm -rfv ${TMP_DIR}' EXIT
2121

22-
if [[ "$BUILD_TOOL" == "cmake" ]]; then
23-
# Setup MacOS dependencies as there is no Docker support on MacOS atm
24-
PYTHON_EXECUTABLE=python \
25-
EXECUTORCH_BUILD_PYBIND=ON \
26-
CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
27-
${CONDA_RUN} --no-capture-output \
28-
.ci/scripts/setup-macos.sh "$@"
22+
# Setup MacOS dependencies as there is no Docker support on MacOS atm
23+
PYTHON_EXECUTABLE=python \
24+
EXECUTORCH_BUILD_PYBIND=ON \
25+
CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
26+
${CONDA_RUN} --no-capture-output \
27+
.ci/scripts/setup-macos.sh "$@"
2928

29+
if [[ "$BUILD_TOOL" == "cmake" ]]; then
3030
# Install llama3_2_vision dependencies.
3131
PYTHON_EXECUTABLE=python \
3232
${CONDA_RUN} --no-capture-output \

.ci/scripts/utils.sh

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,46 @@ install_pytorch_and_domains() {
6060
# Fetch the target commit
6161
pushd pytorch || return
6262
git checkout "${TORCH_VERSION}"
63-
git submodule update --init --recursive
6463

65-
export USE_DISTRIBUTED=1
66-
# Then build and install PyTorch
67-
python setup.py bdist_wheel
68-
pip install "$(echo dist/*.whl)"
64+
local system_name=$(uname)
65+
if [[ "${system_name}" == "Darwin" ]]; then
66+
local platform=$(python -c 'import sysconfig; import platform; v=platform.mac_ver()[0].split(".")[0]; platform=sysconfig.get_platform().split("-"); platform[1]=f"{v}_0"; print("_".join(platform))')
67+
fi
68+
local python_version=$(python -c 'import platform; v=platform.python_version_tuple(); print(f"{v[0]}{v[1]}")')
69+
local torch_release=$(cat version.txt)
70+
local torch_short_hash=${TORCH_VERSION:0:7}
71+
local torch_wheel_path="cached_artifacts/pytorch/executorch/pytorch_wheels/${system_name}/${python_version}"
72+
local torch_wheel_name="torch-${torch_release}%2Bgit${torch_short_hash}-cp${python_version}-cp${python_version}-${platform:-}.whl"
73+
74+
local cached_torch_wheel="https://gha-artifacts.s3.us-east-1.amazonaws.com/${torch_wheel_path}/${torch_wheel_name}"
75+
# Cache PyTorch wheel is only needed on MacOS, Linux CI already has this as part
76+
# of the Docker image
77+
local torch_wheel_not_found=0
78+
if [[ "${system_name}" == "Darwin" ]]; then
79+
pip install "${cached_torch_wheel}" || torch_wheel_not_found=1
80+
else
81+
torch_wheel_not_found=1
82+
fi
83+
84+
# Found no such wheel, we will build it from source then
85+
if [[ "${torch_wheel_not_found}" == "1" ]]; then
86+
echo "No cached wheel found, continue with building PyTorch at ${TORCH_VERSION}"
87+
88+
git submodule update --init --recursive
89+
USE_DISTRIBUTED=1 python setup.py bdist_wheel
90+
pip install "$(echo dist/*.whl)"
91+
92+
# Only AWS runners have access to S3
93+
if command -v aws && [[ -z "${GITHUB_RUNNER:-}" ]]; then
94+
for wheel_path in dist/*.whl; do
95+
local wheel_name=$(basename "${wheel_path}")
96+
echo "Caching ${wheel_name}"
97+
aws s3 cp "${wheel_path}" "s3://gha-artifacts/${torch_wheel_path}/${wheel_name}"
98+
done
99+
fi
100+
else
101+
echo "Use cached wheel at ${cached_torch_wheel}"
102+
fi
69103

70104
# Grab the pinned audio and vision commits from PyTorch
71105
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)

.github/workflows/_android.yml

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,21 @@ jobs:
2727
conda activate "${CONDA_ENV}"
2828
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2
2929
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
30-
31-
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
32-
bash examples/models/llama/install_requirements.sh
33-
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
30+
mkdir -p ${ARTIFACTS_DIR_NAME}/
3431
3532
# Build LLM Demo for Android
3633
export BUILD_AAR_DIR=aar-out
3734
mkdir -p $BUILD_AAR_DIR
38-
bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
39-
bash .ci/scripts/build_android_instrumentation.sh ${ARTIFACTS_DIR_NAME}
35+
bash scripts/build_android_library.sh
36+
cp ${BUILD_AAR_DIR}/executorch.aar $ARTIFACTS_DIR_NAME
37+
38+
mkdir -p ${ARTIFACTS_DIR_NAME}/library_test_dir
39+
bash .ci/scripts/build_android_instrumentation.sh
40+
cp ${BUILD_AAR_DIR}/executorch_android/build/outputs/apk/androidTest/debug/executorch_android-debug-androidTest.apk "${ARTIFACTS_DIR_NAME}/library_test_dir"
41+
42+
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
43+
bash examples/models/llama/install_requirements.sh
44+
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
4045
4146
mkdir -p examples/demo-apps/android/LlamaDemo/app/libs
4247
cp aar-out/executorch.aar examples/demo-apps/android/LlamaDemo/app/libs
@@ -96,7 +101,7 @@ jobs:
96101
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug.apk
97102
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug-androidTest.apk
98103
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/fp32-xnnpack-custom/model.zip
99-
curl -o android-test-debug-androidTest.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch-debug-androidTest.apk
104+
curl -o android-test-debug-androidTest.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch_android-debug-androidTest.apk
100105
unzip model.zip
101106
mv *.pte model.pte
102107
@@ -120,7 +125,7 @@ jobs:
120125
with:
121126
api-level: ${{ env.API_LEVEL }}
122127
arch: x86_64
123-
script: ./build/run_android_emulator.sh
128+
script: ./scripts/run_android_emulator.sh
124129
# NB: This is to boot the emulator faster following the instructions on
125130
# https://github.com/ReactiveCircus/android-emulator-runner. The max number
126131
# of cores we can set is 6, any higher number will be reduced to 6.

.github/workflows/_unittest.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,6 @@ jobs:
4949
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
5050
script: |
5151
set -eux
52+
# This is needed to get the prebuilt PyTorch wheel from S3
53+
${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
5254
.ci/scripts/unittest-macos.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}"

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ jobs:
363363
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
364364
365365
mkdir -p aar-out
366-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_library.sh
366+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash scripts/build_android_library.sh
367367
mkdir -p extension/benchmark/android/benchmark/app/libs
368368
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
369369
pushd extension/benchmark/android/benchmark

0 commit comments

Comments
 (0)