Skip to content

Commit 595d139

Browse files
committed
Update base for Update on "[ET-VK] Replacing use of adaptive_work_group_size function by create_local_wg_size function."
This diff replaces the use of the adaptive_work_group_size function with create_local_wg_size function, which is better tuned for improving shader performance. Differential Revision: [D66308779](https://our.internmc.facebook.com/intern/diff/D66308779/) [ghstack-poisoned]
2 parents 700a473 + b8fbc48 commit 595d139

File tree

114 files changed

+5063
-1043
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

114 files changed

+5063
-1043
lines changed

.ci/scripts/test_llama.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2"
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -44,6 +48,9 @@ MODE=${MODE:-"xnnpack+custom"}
4448
# Default UPLOAD_DIR to empty string if not set
4549
UPLOAD_DIR="${UPLOAD_DIR:-}"
4650

51+
# Default PT2E_QUANTIZE to empty string if not set
52+
PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
53+
4754
if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
4855
echo "Expecting atleast 4 positional arguments"
4956
echo "Usage: [...]"
@@ -234,6 +241,10 @@ if [[ "${COREML}" == "ON" ]]; then
234241
fi
235242
if [[ "${QNN}" == "ON" ]]; then
236243
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
244+
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
245+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
246+
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
247+
fi
237248
fi
238249
# Add dynamically linked library location
239250
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

.github/workflows/build-wheels-linux.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/build-wheels-m1.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ jobs:
2727
test-infra-ref: main
2828
with-cuda: disabled
2929
with-rocm: disabled
30+
python-versions: '["3.10", "3.11", "3.12"]'
3031

3132
build:
3233
needs: generate-matrix

.github/workflows/pull.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ jobs:
368368
strategy:
369369
matrix:
370370
dtype: [fp32]
371+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
371372
mode: [qnn]
372373
fail-fast: false
373374
with:
@@ -384,6 +385,7 @@ jobs:
384385
DTYPE=${{ matrix.dtype }}
385386
BUILD_TOOL="cmake"
386387
MODE=${{ matrix.mode }}
388+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
387389
388390
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
389391
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
393395
# Install requirements for export_llama
394396
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395397
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
398+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397399
398400
test-phi-3-mini-runner-linux:
399401
name: test-phi-3-mini-runner-linux

.github/workflows/trunk.yml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,3 +441,39 @@ jobs:
441441
442442
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443443
echo "::endgroup::"
444+
445+
446+
test-llama-runner-qnn-linux:
447+
name: test-llama-runner-qnn-linux
448+
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
449+
strategy:
450+
matrix:
451+
dtype: [fp32]
452+
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
453+
mode: [qnn]
454+
fail-fast: false
455+
with:
456+
runner: linux.2xlarge
457+
docker-image: executorch-ubuntu-22.04-qnn-sdk
458+
submodules: 'true'
459+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
460+
timeout: 900
461+
script: |
462+
# The generic Linux job chooses to use base env, not the one setup by the image
463+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
464+
conda activate "${CONDA_ENV}"
465+
466+
BUILD_TOOL="cmake"
467+
DTYPE=${{ matrix.dtype }}
468+
MODE=${{ matrix.mode }}
469+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
470+
471+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
472+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
473+
474+
# Setup executorch
475+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
476+
# Install requirements for export_llama
477+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
478+
# Test llama2
479+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@
6464
[submodule "third-party/pybind11"]
6565
path = third-party/pybind11
6666
url = https://github.com/pybind/pybind11.git
67+
[submodule "backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3"]
68+
path = backends/cadence/fusion_g3/third-party/nnlib/nnlib-FusionG3
69+
url = https://github.com/foss-xtensa/nnlib-FusionG3/
6770
[submodule "third-party/ao"]
6871
path = third-party/ao
6972
url = https://github.com/pytorch/ao.git

CONTRIBUTING.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -283,10 +283,15 @@ for basics.
283283
- If the reviewers have requests or questions, follow up with them.
284284
- The goal of the reviewer is to ensure that the code in the `main` branch of
285285
the repo is consistent, maintainable, and of high quality.
286-
1. Once approved, your reviewer will import the PR into Meta's internal system
287-
and merge it from there.
288-
- If the PR is approved and not merged within a few business days, please
289-
comment on the PR to ask about its status.
286+
1. Once the PR has been approved,
287+
- If you have the "write permission" in this repo, you can merge it yourself
288+
by clicking the "Squash and merge" button once it is green and all CI
289+
signals are passing.
290+
- If you don't have "write permission" in this repo, the reviewer will take
291+
care of the PR. The reviewer may import the PR into Meta's internal system
292+
to validate it against internal CI.
293+
- If the PR is approved but not merged within 5 business days, please comment
294+
on the PR to ask about its status.
290295
- Note that if the `main` [CI](#continuous-integration) jobs are broken, we
291296
will only merge PRs that fix the broken jobs until all critical jobs are
292297
fixed.

backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -26,25 +26,38 @@ + (nullable NSURL *)compileModelAtURL:(NSURL *)modelURL
2626
#else
2727
__block NSError *localError = nil;
2828
__block NSURL *result = nil;
29-
30-
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
31-
[MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
32-
result = [tempURL copy];
33-
localError = compilationError;
34-
dispatch_semaphore_signal(sema);
35-
}];
36-
37-
long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
38-
if (status != 0) {
29+
30+
if (@available(iOS 16, macOS 13, watchOS 9, tvOS 16, *)) {
31+
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
32+
[MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
33+
result = [tempURL copy];
34+
localError = compilationError;
35+
dispatch_semaphore_signal(sema);
36+
}];
37+
38+
long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
39+
if (status != 0) {
40+
ETCoreMLLogErrorAndSetNSError(error,
41+
ETCoreMLErrorCompilationFailed,
42+
"%@: Failed to compile model in %f seconds.",
43+
NSStringFromClass(ETCoreMLModelCompiler.class),
44+
maxWaitTimeInSeconds);
45+
return nil;
46+
}
47+
} else {
48+
result = [MLModel compileModelAtURL:modelURL error:&localError];
49+
}
50+
51+
if (localError) {
3952
ETCoreMLLogErrorAndSetNSError(error,
40-
ETCoreMLErrorCompilationFailed,
41-
"%@: Failed to compile model in %f seconds.",
42-
NSStringFromClass(ETCoreMLModelCompiler.class),
43-
maxWaitTimeInSeconds);
53+
ETCoreMLErrorCompilationFailed,
54+
"%@: Failed to compile model, error: %@",
55+
NSStringFromClass(ETCoreMLModelCompiler.class),
56+
localError);
4457
return nil;
58+
} else {
59+
return result;
4560
}
46-
47-
return result;
4861
#endif
4962
}
5063

backends/apple/coreml/scripts/install_requirements.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ rm -rf "$COREML_DIR_PATH/third-party"
2424
mkdir "$COREML_DIR_PATH/third-party"
2525

2626
echo "${green}ExecuTorch: Cloning coremltools."
27-
git clone --depth 1 --branch 8.0 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
27+
git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
2828
cd $COREMLTOOLS_DIR_PATH
2929

3030
STATUS=$?

backends/apple/coreml/test/test_coreml_partitioner.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,15 @@ def test_vit_skip_conv(self):
7171
)
7272
)
7373

74-
conv_block = ["aten.convolution.default", "executorch_call_delegate"]
75-
safe_softmax_block = [
76-
"getitem",
77-
"getitem",
78-
"getitem",
79-
"getitem",
80-
"aten.any.dim",
81-
"executorch_call_delegate",
82-
]
83-
final_block = ["getitem"]
84-
total = conv_block + 12 * safe_softmax_block + final_block
85-
8674
assert [
8775
node.target.__name__
8876
for node in delegated_program_manager.exported_program().graph.nodes
8977
if node.op == "call_function"
90-
] == total
78+
] == [
79+
"aten.convolution.default",
80+
"executorch_call_delegate",
81+
"getitem",
82+
]
9183

9284
def test_buffer(self):
9385
embedding_dim = 3

0 commit comments

Comments
 (0)