Skip to content

Commit 56d02f7

Browse files
committed
Update on "[Executorch][custom ops] Change lib loading logic to account for package dir"
Just looking at the location of the source file. In this case custom_ops.py, can, and does, yield to wrong location depending on where you import custom_ops from. If you are importing custom_ops from another source file inside extension folder, e.g. builder.py that is in extensions/llm/export, then, I think, custom_ops gets resolved to the one installed in site-packages or pip package. But if this is imported from say examples/models/llama/source_transformations/quantized_kv_cache.py (Like in the in next PR), then it seems to resolve to the source location. In one of the CI this is /pytorch/executorch. Now depending on which directory your filepath resolves to, you will search for lib in that. This of course does not work when filepath resolves to source location. This PR changes that to resolve to package location. Differential Revision: [D66385480](https://our.internmc.facebook.com/intern/diff/D66385480/) [ghstack-poisoned]
2 parents 7c06524 + c5e4241 commit 56d02f7

File tree

191 files changed

+9897
-1408
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

191 files changed

+9897
-1408
lines changed

.ci/scripts/gather_test_models.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@
2020
CUSTOM_RUNNERS = {
2121
"linux": {
2222
# This one runs OOM on smaller runner, the root cause is unclear (T163016365)
23-
"w2l": "linux.12xlarge",
24-
"ic4": "linux.12xlarge",
25-
"resnet50": "linux.12xlarge",
26-
"llava": "linux.12xlarge",
27-
"llama3_2_vision_encoder": "linux.12xlarge",
28-
# "llama3_2_text_decoder": "linux.12xlarge", # TODO: re-enable test when Huy's change is in / model gets smaller.
23+
"w2l": "linux.4xlarge.memory",
24+
"ic4": "linux.4xlarge.memory",
25+
"resnet50": "linux.4xlarge.memory",
26+
"llava": "linux.4xlarge.memory",
27+
"llama3_2_vision_encoder": "linux.4xlarge.memory",
28+
"llama3_2_text_decoder": "linux.4xlarge.memory",
2929
# This one causes timeout on smaller runner, the root cause is unclear (T161064121)
30-
"dl3": "linux.12xlarge",
31-
"emformer_join": "linux.12xlarge",
32-
"emformer_predict": "linux.12xlarge",
30+
"dl3": "linux.4xlarge.memory",
31+
"emformer_join": "linux.4xlarge.memory",
32+
"emformer_predict": "linux.4xlarge.memory",
3333
}
3434
}
3535

@@ -39,10 +39,12 @@
3939
"linux": {
4040
"mobilebert": 90,
4141
"emformer_predict": 360,
42+
"llama3_2_text_decoder": 360,
4243
},
4344
"macos": {
4445
"mobilebert": 90,
4546
"emformer_predict": 360,
47+
"llama3_2_text_decoder": 360,
4648
},
4749
}
4850

.ci/scripts/setup-macos.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ install_buck() {
4949

5050
rm "${BUCK2}"
5151
popd
52+
53+
# Kill all running buck2 daemon for a fresh start
54+
buck2 killall || true
5255
}
5356

5457
function write_sccache_stub() {

.ci/scripts/test_llama.sh

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2"
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -44,6 +48,12 @@ MODE=${MODE:-"xnnpack+custom"}
4448
# Default UPLOAD_DIR to empty string if not set
4549
UPLOAD_DIR="${UPLOAD_DIR:-}"
4650

51+
# Default PT2E_QUANTIZE to empty string if not set
52+
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
53+
54+
# Default CMake Build Type to release mode
55+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
56+
4757
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
4858
echo "Expecting atleast 4 positional arguments"
4959
echo "Usage: [...]"
@@ -136,7 +146,7 @@ cmake_install_executorch_libraries() {
136146
rm -rf cmake-out
137147
retry cmake \
138148
-DCMAKE_INSTALL_PREFIX=cmake-out \
139-
-DCMAKE_BUILD_TYPE=Debug \
149+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
140150
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
141151
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
142152
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
@@ -150,22 +160,22 @@ cmake_install_executorch_libraries() {
150160
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
151161
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
152162
-Bcmake-out .
153-
cmake --build cmake-out -j9 --target install --config Debug
163+
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
154164
}
155165

156166
cmake_build_llama_runner() {
157167
echo "Building llama runner"
158168
dir="examples/models/llama"
159169
retry cmake \
160170
-DCMAKE_INSTALL_PREFIX=cmake-out \
161-
-DCMAKE_BUILD_TYPE=Debug \
171+
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
162172
-DEXECUTORCH_BUILD_KERNELS_CUSTOM="$CUSTOM" \
163173
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
164174
-DEXECUTORCH_BUILD_XNNPACK="$XNNPACK" \
165175
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
166176
-Bcmake-out/${dir} \
167177
${dir}
168-
cmake --build cmake-out/${dir} -j9 --config Debug
178+
cmake --build cmake-out/${dir} -j9 --config "$CMAKE_BUILD_TYPE"
169179

170180
}
171181

@@ -234,6 +244,10 @@ if [[ "${COREML}" == "ON" ]]; then
234244
fi
235245
if [[ "${QNN}" == "ON" ]]; then
236246
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
247+
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
248+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
249+
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
250+
fi
237251
fi
238252
# Add dynamically linked library location
239253
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

.ci/scripts/test_llava.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
set -exu
99
# shellcheck source=/dev/null
1010

11-
BUILD_TYPE=${1:-Debug}
1211
TARGET_OS=${2:-Native}
1312
BUILD_DIR=${3:-cmake-out}
13+
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
1414

15-
echo "Building with BUILD_TYPE: $BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"
15+
echo "Building with CMAKE_BUILD_TYPE: $CMAKE_BUILD_TYPE, TARGET_OS: $TARGET_OS, BUILD_DIR: $BUILD_DIR"
1616

1717
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
1818
PYTHON_EXECUTABLE=python3
@@ -32,7 +32,7 @@ if hash nproc &> /dev/null; then NPROC=$(nproc); fi
3232

3333
EXECUTORCH_COMMON_CMAKE_ARGS=" \
3434
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
35-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
35+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
3636
-DEXECUTORCH_ENABLE_LOGGING=ON \
3737
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
3838
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
@@ -49,7 +49,7 @@ cmake_install_executorch_libraries() {
4949
${EXECUTORCH_COMMON_CMAKE_ARGS} \
5050
-B${BUILD_DIR} .
5151

52-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
52+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
5353
}
5454

5555
cmake_install_executorch_libraries_for_android() {
@@ -59,14 +59,14 @@ cmake_install_executorch_libraries_for_android() {
5959
${EXECUTORCH_COMMON_CMAKE_ARGS} \
6060
-B${BUILD_DIR} .
6161

62-
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${BUILD_TYPE}
62+
cmake --build ${BUILD_DIR} -j${NPROC} --target install --config ${CMAKE_BUILD_TYPE}
6363
}
6464

6565

6666
LLAVA_COMMON_CMAKE_ARGS=" \
6767
-DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" \
6868
-DCMAKE_INSTALL_PREFIX=${BUILD_DIR} \
69-
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
69+
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} \
7070
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
7171
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
7272
-DEXECUTORCH_BUILD_XNNPACK=ON"
@@ -81,7 +81,7 @@ cmake_build_llava_runner() {
8181
-B${BUILD_DIR}/${dir} \
8282
${dir}
8383

84-
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
84+
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${CMAKE_BUILD_TYPE}
8585
}
8686

8787

@@ -98,7 +98,7 @@ cmake_build_llava_runner_for_android() {
9898
-B${BUILD_DIR}/${dir} \
9999
${dir}
100100

101-
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${BUILD_TYPE}
101+
cmake --build ${BUILD_DIR}/${dir} -j${NPROC} --config ${CMAKE_BUILD_TYPE}
102102
}
103103

104104
# only export the one without custom op for now since it's

.github/workflows/apple.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ jobs:
4242
4343
build-demo-ios:
4444
name: build-demo-ios
45+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
46+
if: ${{ !github.event.pull_request.head.repo.fork }}
4547
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
4648
secrets: inherit
4749
with:
@@ -190,6 +192,8 @@ jobs:
190192
) done
191193
192194
upload-frameworks-ios:
195+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
196+
if: ${{ !github.event.pull_request.head.repo.fork }}
193197
runs-on: ubuntu-22.04
194198
needs: [build-frameworks-ios, set-version]
195199
timeout-minutes: 30
@@ -278,6 +282,8 @@ jobs:
278282
279283
build-benchmark-app:
280284
name: build-benchmark-app
285+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
286+
if: ${{ !github.event.pull_request.head.repo.fork }}
281287
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
282288
secrets: inherit
283289
with:

.github/workflows/build-wheels-linux.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/build-wheels-m1.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/ghstack_land.yml

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,7 @@ on:
33
pull_request:
44
types: [closed]
55
branches:
6-
- 'gh/cccclai/[0-9]+/base'
7-
- 'gh/dbort/[0-9]+/base'
8-
- 'gh/dvorjackz/[0-9]+/base'
9-
- 'gh/guangy10/[0-9]+/base'
10-
- 'gh/helunwencser/[0-9]+/base'
11-
- 'gh/jorgep31415/[0-9]+/base'
12-
- 'gh/kimishpatel/[0-9]+/base'
13-
- 'gh/kirklandsign/[0-9]+/base'
14-
- 'gh/larryliu0820/[0-9]+/base'
15-
- 'gh/lucylq/[0-9]+/base'
16-
- 'gh/manuelcandales/[0-9]+/base'
17-
- 'gh/mcr229/[0-9]+/base'
18-
- 'gh/swolchok/[0-9]+/base'
19-
- 'gh/SS-JIA/[0-9]+/base'
20-
- 'gh/trivedivivek/[0-9]+/base'
6+
- 'gh/*/[0-9]+/base'
217

228
jobs:
239
ghstack_merge_to_main:

.github/workflows/pull.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,7 @@ jobs:
332332
docker-image: executorch-ubuntu-22.04-clang12
333333

334334
unittest-arm:
335-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
335+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
336336
with:
337337
runner: linux.2xlarge
338338
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -368,6 +368,7 @@ jobs:
368368
strategy:
369369
matrix:
370370
dtype: [fp32]
371+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
371372
mode: [qnn]
372373
fail-fast: false
373374
with:
@@ -384,6 +385,7 @@ jobs:
384385
DTYPE=${{ matrix.dtype }}
385386
BUILD_TOOL="cmake"
386387
MODE=${{ matrix.mode }}
388+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
387389
388390
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
389391
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
393395
# Install requirements for export_llama
394396
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395397
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
398+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397399
398400
test-phi-3-mini-runner-linux:
399401
name: test-phi-3-mini-runner-linux

.github/workflows/trunk.yml

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ jobs:
131131
132132
test-arm-backend-delegation:
133133
name: test-arm-backend-delegation
134-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
134+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
135135
with:
136136
runner: linux.2xlarge
137137
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -157,7 +157,7 @@ jobs:
157157
158158
test-arm-reference-delegation:
159159
name: test-arm-reference-delegation
160-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
160+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
161161
with:
162162
runner: linux.2xlarge
163163
docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -290,7 +290,7 @@ jobs:
290290
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
291291

292292
# # run e2e (export, tokenizer and runner)
293-
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release
293+
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
294294

295295
test-qnn-model:
296296
name: test-qnn-model
@@ -351,6 +351,8 @@ jobs:
351351
done
352352
353353
test-huggingface-transformers:
354+
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
355+
if: ${{ !github.event.pull_request.head.repo.fork }}
354356
name: test-huggingface-transformers
355357
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
356358
secrets: inherit
@@ -441,3 +443,39 @@ jobs:
441443
442444
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443445
echo "::endgroup::"
446+
447+
448+
test-llama-runner-qnn-linux:
449+
name: test-llama-runner-qnn-linux
450+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
451+
strategy:
452+
matrix:
453+
dtype: [fp32]
454+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
455+
mode: [qnn]
456+
fail-fast: false
457+
with:
458+
runner: linux.2xlarge
459+
docker-image: executorch-ubuntu-22.04-qnn-sdk
460+
submodules: 'true'
461+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
462+
timeout: 900
463+
script: |
464+
# The generic Linux job chooses to use base env, not the one setup by the image
465+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
466+
conda activate "${CONDA_ENV}"
467+
468+
BUILD_TOOL="cmake"
469+
DTYPE=${{ matrix.dtype }}
470+
MODE=${{ matrix.mode }}
471+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
472+
473+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
474+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
475+
476+
# Setup executorch
477+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
478+
# Install requirements for export_llama
479+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
480+
# Test llama2
481+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"

0 commit comments

Comments
 (0)