Skip to content

Commit 146d0a8

Browse files
committed
Update base for Update on "[ET-VK] Rearranging code in permute op shader to reduce heavy math ops and improve performance."
The diff rearranges Permute op shader code in executorch vulkan backend to reduce heavy math operations and improve performance. The change also include adding an extension to support explicit arithmetic types and changing the data type of the position variable to u16vec3. Differential Revision: [D66174765](https://our.internmc.facebook.com/intern/diff/D66174765/) [ghstack-poisoned]
2 parents 7375cf5 + 2967302 commit 146d0a8

File tree

108 files changed

+5007
-927
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+5007
-927
lines changed

.ci/scripts/test_llama.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2"
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -44,6 +48,9 @@ MODE=${MODE:-"xnnpack+custom"}
4448
# Default UPLOAD_DIR to empty string if not set
4549
UPLOAD_DIR="${UPLOAD_DIR:-}"
4650

51+
# Default PT2E_QUANTIZE to empty string if not set
52+
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
53+
4754
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
4855
echo "Expecting atleast 4 positional arguments"
4956
echo "Usage: [...]"
@@ -234,6 +241,10 @@ if [[ "${COREML}" == "ON" ]]; then
234241
fi
235242
if [[ "${QNN}" == "ON" ]]; then
236243
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
244+
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
245+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
246+
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
247+
fi
237248
fi
238249
# Add dynamically linked library location
239250
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

.github/workflows/build-wheels-linux.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/build-wheels-m1.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/pull.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ jobs:
368368
strategy:
369369
matrix:
370370
dtype: [fp32]
371+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
371372
mode: [qnn]
372373
fail-fast: false
373374
with:
@@ -384,6 +385,7 @@ jobs:
384385
DTYPE=${{ matrix.dtype }}
385386
BUILD_TOOL="cmake"
386387
MODE=${{ matrix.mode }}
388+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
387389
388390
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
389391
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
393395
# Install requirements for export_llama
394396
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395397
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
398+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397399
398400
test-phi-3-mini-runner-linux:
399401
name: test-phi-3-mini-runner-linux

.github/workflows/trunk.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,39 @@ jobs:
441441
442442
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443443
echo "::endgroup::"
444+
445+
446+
test-llama-runner-qnn-linux:
447+
name: test-llama-runner-qnn-linux
448+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
449+
strategy:
450+
matrix:
451+
dtype: [fp32]
452+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
453+
mode: [qnn]
454+
fail-fast: false
455+
with:
456+
runner: linux.2xlarge
457+
docker-image: executorch-ubuntu-22.04-qnn-sdk
458+
submodules: 'true'
459+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
460+
timeout: 900
461+
script: |
462+
# The generic Linux job chooses to use base env, not the one setup by the image
463+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
464+
conda activate "${CONDA_ENV}"
465+
466+
BUILD_TOOL="cmake"
467+
DTYPE=${{ matrix.dtype }}
468+
MODE=${{ matrix.mode }}
469+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
470+
471+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
472+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
473+
474+
# Setup executorch
475+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
476+
# Install requirements for export_llama
477+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
478+
# Test llama2
479+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
[submodule "third-party/pybind11"]
6565
path = third-party/pybind11
6666
url = https://github.com/pybind/pybind11.git
67+
[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
68+
path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
69+
url = https://github.com/foss-xtensa/nnlib-FusionG3/
6770
[submodule "third-party/ao"]
6871
path = third-party/ao
6972
url = https://github.com/pytorch/ao.git

CONTRIBUTING.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,15 @@ for basics.
283283
- If the reviewers have requests or questions, follow up with them.
284284
- The goal of the reviewer is to ensure that the code in the `main` branch of
285285
the repo is consistent, maintainable, and of high quality.
286-
1. Once approved, your reviewer will import the PR into Meta's internal system
287-
and merge it from there.
288-
- If the PR is approved and not merged within a few business days, please
289-
comment on the PR to ask about its status.
286+
1. Once the PR has been approved,
287+
- If you have the "write permission" in this repo, you can merge it yourself
288+
by clicking the "Squash and merge" button once it is green and all CI
289+
signals are passing.
290+
- If you don't have "write permission" in this repo, the reviewer will take
291+
care of the PR. The reviewer may import the PR into Meta's internal system
292+
to validate it against internal CI.
293+
- If the PR is approved but not merged within 5 business days, please comment
294+
on the PR to ask about its status.
290295
- Note that if the `main` [CI](#continuous-integration) jobs are broken, we
291296
will only merge PRs that fix the broken jobs until all critical jobs are
292297
fixed.

backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,25 +26,38 @@ + (nullable NSURL *)compileModelAtURL:(NSURL *)modelURL
2626
#else
2727
__block NSError *localError = nil;
2828
__block NSURL *result = nil;
29-
30-
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
31-
[MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
32-
result = [tempURL copy];
33-
localError = compilationError;
34-
dispatch_semaphore_signal(sema);
35-
}];
36-
37-
long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
38-
if (status != 0) {
29+
30+
if (@available(iOS 16, macOS 13, watchOS 9, tvOS 16, *)) {
31+
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
32+
[MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
33+
result = [tempURL copy];
34+
localError = compilationError;
35+
dispatch_semaphore_signal(sema);
36+
}];
37+
38+
long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
39+
if (status != 0) {
40+
ETCoreMLLogErrorAndSetNSError(error,
41+
ETCoreMLErrorCompilationFailed,
42+
"%@: Failed to compile model in %f seconds.",
43+
NSStringFromClass(ETCoreMLModelCompiler.class),
44+
maxWaitTimeInSeconds);
45+
return nil;
46+
}
47+
} else {
48+
result = [MLModel compileModelAtURL:modelURL error:&localError];
49+
}
50+
51+
if (localError) {
3952
ETCoreMLLogErrorAndSetNSError(error,
40-
ETCoreMLErrorCompilationFailed,
41-
"%@: Failed to compile model in %f seconds.",
42-
NSStringFromClass(ETCoreMLModelCompiler.class),
43-
maxWaitTimeInSeconds);
53+
ETCoreMLErrorCompilationFailed,
54+
"%@: Failed to compile model, error: %@",
55+
NSStringFromClass(ETCoreMLModelCompiler.class),
56+
localError);
4457
return nil;
58+
} else {
59+
return result;
4560
}
46-
47-
return result;
4861
#endif
4962
}
5063

backends/apple/coreml/scripts/install_requirements.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ rm -rf "$COREML_DIR_PATH/third-party"
2424
mkdir "$COREML_DIR_PATH/third-party"
2525

2626
echo "${green}ExecuTorch: Cloning coremltools."
27-
git clone --depth 1 --branch 8.0 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
27+
git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
2828
cd $COREMLTOOLS_DIR_PATH
2929

3030
STATUS=$?

backends/apple/coreml/test/test_coreml_partitioner.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,15 @@ def test_vit_skip_conv(self):
7171
)
7272
)
7373

74-
conv_block = ["aten.convolution.default", "executorch_call_delegate"]
75-
safe_softmax_block = [
76-
"getitem",
77-
"getitem",
78-
"getitem",
79-
"getitem",
80-
"aten.any.dim",
81-
"executorch_call_delegate",
82-
]
83-
final_block = ["getitem"]
84-
total = conv_block + 12 * safe_softmax_block + final_block
85-
8674
assert [
8775
node.target.__name__
8876
for node in delegated_program_manager.exported_program().graph.nodes
8977
if node.op == "call_function"
90-
] == total
78+
] == [
79+
"aten.convolution.default",
80+
"executorch_call_delegate",
81+
"getitem",
82+
]
9183

9284
def test_buffer(self):
9385
embedding_dim = 3

0 commit comments

Comments
 (0)