Skip to content

Commit 1d4b9b9

Browse files
committed
Update on "[ET-VK] Adding repeat support to add_copy_packed_dim_offset_node function."
This diff adds support for the repeat operation in `add_copy_packed_dim_offset_node` function in the Vulkan backend for Executorch. The function now takes an additional boolean parameter, "repeat", which indicates whether the copy should wrap around the tensor dimension. `copy_packed_dim_offset` shader now has 2 functions `repeat_copy` and `no_repeat_copy` which are chosen based on specialization constant parameter. `no_repeat_copy` function has the legacy copy code. `repeat_copy` function reads input tensor's dim based on output pos and wraps it according to WHCB repetitions. Push constants `src_offset` and `dst_offset` contains source and destination's WHCB dimensions (and not copy offsets) respectively, when calling repeat function. Differential Revision: [D71477552](https://our.internmc.facebook.com/intern/diff/D71477552/) [ghstack-poisoned]
2 parents 6da1074 + 5de50e1 commit 1d4b9b9

File tree

112 files changed

+1568
-611
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

112 files changed

+1568
-611
lines changed

.ci/scripts/build_android_instrumentation.sh

Lines changed: 6 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -12,30 +12,10 @@ if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1212
fi
1313
which "${PYTHON_EXECUTABLE}"
1414

15-
build_android_test() {
16-
mkdir -p extension/android/executorch_android/src/androidTest/resources
17-
cp extension/module/test/resources/add.pte extension/android/executorch_android/src/androidTest/resources
18-
pushd extension/android
19-
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:testDebugUnitTest
20-
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest
21-
popd
22-
}
15+
mkdir -p "${BUILD_AAR_DIR}"/executorch_android/src/androidTest/resources
16+
cp extension/module/test/resources/add.pte "${BUILD_AAR_DIR}"/executorch_android/src/androidTest/resources
2317

24-
collect_artifacts_to_be_uploaded() {
25-
ARTIFACTS_DIR_NAME="$1"
26-
# Collect Java library test
27-
JAVA_LIBRARY_TEST_DIR="${ARTIFACTS_DIR_NAME}/library_test_dir"
28-
mkdir -p "${JAVA_LIBRARY_TEST_DIR}"
29-
cp extension/android/executorch_android/build/outputs/apk/androidTest/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
30-
}
31-
32-
main() {
33-
build_android_test
34-
if [ -n "$ARTIFACTS_DIR_NAME" ]; then
35-
collect_artifacts_to_be_uploaded ${ARTIFACTS_DIR_NAME}
36-
fi
37-
}
38-
39-
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
40-
main "$@"
41-
fi
18+
pushd "${BUILD_AAR_DIR}"
19+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:testDebugUnitTest
20+
ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:assembleAndroidTest
21+
popd

.ci/scripts/gather_test_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"dl3": "linux.4xlarge.memory",
3434
"emformer_join": "linux.4xlarge.memory",
3535
"emformer_predict": "linux.4xlarge.memory",
36-
"phi-4-mini": "linux.4xlarge.memory",
36+
"phi_4_mini": "linux.4xlarge.memory",
3737
}
3838
}
3939

.ci/scripts/test_llama_torchao_lowbit.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ ${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
7878
-qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
7979
--group_size ${QLINEAR_GROUP_SIZE} \
8080
-E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
81-
--disable_dynamic_shape \
8281
-d fp32
8382

8483
# Test run

.ci/scripts/test_model.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ test_model() {
100100
rm "./${MODEL_NAME}.pte"
101101
return # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
102102
fi
103-
if [[ "${MODEL_NAME}" == "phi-4-mini" ]]; then
103+
if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
104104
# Install requirements for export_llama
105105
bash examples/models/llama/install_requirements.sh
106106
# Test export_llama script: python3 -m examples.models.llama.export_llama.
107-
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi-4-mini/config.json
107+
"${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/phi_4_mini/config.json
108108
run_portable_executor_runner
109109
rm "./${MODEL_NAME}.pte"
110110
return

.ci/scripts/unittest-macos-buck2.sh

100644100755
File mode changed.

.ci/scripts/unittest-macos.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ export TMP_DIR=$(mktemp -d)
1919
export PATH="${TMP_DIR}:$PATH"
2020
trap 'rm -rfv ${TMP_DIR}' EXIT
2121

22-
if [[ "$BUILD_TOOL" == "cmake" ]]; then
23-
# Setup MacOS dependencies as there is no Docker support on MacOS atm
24-
PYTHON_EXECUTABLE=python \
25-
EXECUTORCH_BUILD_PYBIND=ON \
26-
CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
27-
${CONDA_RUN} --no-capture-output \
28-
.ci/scripts/setup-macos.sh "$@"
22+
# Setup MacOS dependencies as there is no Docker support on MacOS atm
23+
PYTHON_EXECUTABLE=python \
24+
EXECUTORCH_BUILD_PYBIND=ON \
25+
CMAKE_ARGS="-DEXECUTORCH_BUILD_COREML=ON -DEXECUTORCH_BUILD_MPS=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" \
26+
${CONDA_RUN} --no-capture-output \
27+
.ci/scripts/setup-macos.sh "$@"
2928

29+
if [[ "$BUILD_TOOL" == "cmake" ]]; then
3030
# Install llama3_2_vision dependencies.
3131
PYTHON_EXECUTABLE=python \
3232
${CONDA_RUN} --no-capture-output \

.ci/scripts/utils.sh

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,46 @@ install_pytorch_and_domains() {
6060
# Fetch the target commit
6161
pushd pytorch || return
6262
git checkout "${TORCH_VERSION}"
63-
git submodule update --init --recursive
6463

65-
export USE_DISTRIBUTED=1
66-
# Then build and install PyTorch
67-
python setup.py bdist_wheel
68-
pip install "$(echo dist/*.whl)"
64+
local system_name=$(uname)
65+
if [[ "${system_name}" == "Darwin" ]]; then
66+
local platform=$(python -c 'import sysconfig; import platform; v=platform.mac_ver()[0].split(".")[0]; platform=sysconfig.get_platform().split("-"); platform[1]=f"{v}_0"; print("_".join(platform))')
67+
fi
68+
local python_version=$(python -c 'import platform; v=platform.python_version_tuple(); print(f"{v[0]}{v[1]}")')
69+
local torch_release=$(cat version.txt)
70+
local torch_short_hash=${TORCH_VERSION:0:7}
71+
local torch_wheel_path="cached_artifacts/pytorch/executorch/pytorch_wheels/${system_name}/${python_version}"
72+
local torch_wheel_name="torch-${torch_release}%2Bgit${torch_short_hash}-cp${python_version}-cp${python_version}-${platform:-}.whl"
73+
74+
local cached_torch_wheel="https://gha-artifacts.s3.us-east-1.amazonaws.com/${torch_wheel_path}/${torch_wheel_name}"
75+
# Cache PyTorch wheel is only needed on MacOS, Linux CI already has this as part
76+
# of the Docker image
77+
local torch_wheel_not_found=0
78+
if [[ "${system_name}" == "Darwin" ]]; then
79+
pip install "${cached_torch_wheel}" || torch_wheel_not_found=1
80+
else
81+
torch_wheel_not_found=1
82+
fi
83+
84+
# Found no such wheel, we will build it from source then
85+
if [[ "${torch_wheel_not_found}" == "1" ]]; then
86+
echo "No cached wheel found, continue with building PyTorch at ${TORCH_VERSION}"
87+
88+
git submodule update --init --recursive
89+
USE_DISTRIBUTED=1 python setup.py bdist_wheel
90+
pip install "$(echo dist/*.whl)"
91+
92+
# Only AWS runners have access to S3
93+
if command -v aws && [[ -z "${GITHUB_RUNNER:-}" ]]; then
94+
for wheel_path in dist/*.whl; do
95+
local wheel_name=$(basename "${wheel_path}")
96+
echo "Caching ${wheel_name}"
97+
aws s3 cp "${wheel_path}" "s3://gha-artifacts/${torch_wheel_path}/${wheel_name}"
98+
done
99+
fi
100+
else
101+
echo "Use cached wheel at ${cached_torch_wheel}"
102+
fi
69103

70104
# Grab the pinned audio and vision commits from PyTorch
71105
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)

.github/workflows/_android.yml

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,21 @@ jobs:
2727
conda activate "${CONDA_ENV}"
2828
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2
2929
export ARTIFACTS_DIR_NAME=artifacts-to-be-uploaded
30-
31-
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
32-
bash examples/models/llama/install_requirements.sh
33-
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
30+
mkdir -p ${ARTIFACTS_DIR_NAME}/
3431
3532
# Build LLM Demo for Android
3633
export BUILD_AAR_DIR=aar-out
3734
mkdir -p $BUILD_AAR_DIR
38-
bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
39-
bash .ci/scripts/build_android_instrumentation.sh ${ARTIFACTS_DIR_NAME}
35+
bash scripts/build_android_library.sh
36+
cp ${BUILD_AAR_DIR}/executorch.aar $ARTIFACTS_DIR_NAME
37+
38+
mkdir -p ${ARTIFACTS_DIR_NAME}/library_test_dir
39+
bash .ci/scripts/build_android_instrumentation.sh
40+
cp ${BUILD_AAR_DIR}/executorch_android/build/outputs/apk/androidTest/debug/executorch_android-debug-androidTest.apk "${ARTIFACTS_DIR_NAME}/library_test_dir"
41+
42+
mkdir -p ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
43+
bash examples/models/llama/install_requirements.sh
44+
bash ".ci/scripts/test_llama.sh" -model stories110M -build_tool cmake -dtype fp16 -mode portable -upload ${ARTIFACTS_DIR_NAME}/fp32-xnnpack-custom
4045
4146
mkdir -p examples/demo-apps/android/LlamaDemo/app/libs
4247
cp aar-out/executorch.aar examples/demo-apps/android/LlamaDemo/app/libs
@@ -96,7 +101,7 @@ jobs:
96101
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug.apk
97102
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/llm_demo/app-debug-androidTest.apk
98103
curl -O https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/fp32-xnnpack-custom/model.zip
99-
curl -o android-test-debug-androidTest.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch-debug-androidTest.apk
104+
curl -o android-test-debug-androidTest.apk https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/library_test_dir/executorch_android-debug-androidTest.apk
100105
unzip model.zip
101106
mv *.pte model.pte
102107
@@ -120,7 +125,7 @@ jobs:
120125
with:
121126
api-level: ${{ env.API_LEVEL }}
122127
arch: x86_64
123-
script: ./build/run_android_emulator.sh
128+
script: ./scripts/run_android_emulator.sh
124129
# NB: This is to boot the emulator faster following the instructions on
125130
# https://github.com/ReactiveCircus/android-emulator-runner. The max number
126131
# of cores we can set is 6, any higher number will be reduced to 6.

.github/workflows/_unittest.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,6 @@ jobs:
4949
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
5050
script: |
5151
set -eux
52+
# This is needed to get the prebuilt PyTorch wheel from S3
53+
${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
5254
.ci/scripts/unittest-macos.sh --build-tool "${{ inputs.build-tool }}" --build-mode "${{ inputs.build-mode }}" --editable "${{ inputs.editable }}"

.github/workflows/android-perf.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ jobs:
363363
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
364364
365365
mkdir -p aar-out
366-
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_library.sh
366+
PYTHON_EXECUTABLE=python ANDROID_ABIS="arm64-v8a" BUILD_AAR_DIR=aar-out EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash scripts/build_android_library.sh
367367
mkdir -p extension/benchmark/android/benchmark/app/libs
368368
cp aar-out/executorch.aar extension/benchmark/android/benchmark/app/libs
369369
pushd extension/benchmark/android/benchmark

0 commit comments

Comments
 (0)