Skip to content

Commit 175fd38

Browse files
committed
Update
[ghstack-poisoned]
2 parents 0ab28f4 + bef9555 commit 175fd38

40 files changed

+1603
-471
lines changed
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euo pipefail
9+
10+
echo ">>> Script invoked with arguments: $@"
11+
12+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
13+
14+
# Download QNN_SDK. If already downloaded, export environment path
15+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
16+
install_qnn
17+
18+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
19+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
20+
export PYTHONPATH=".."
21+
cp schema/program.fbs exir/_serialize/program.fbs
22+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
23+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
24+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
25+
26+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
27+
PYTHON_EXECUTABLE=python3
28+
fi
29+
30+
which "${PYTHON_EXECUTABLE}"
31+
32+
# -------------------------------
33+
# Parse args
34+
# -------------------------------
35+
EXTRA_FLAGS=""
36+
THRESHOLD=62.0 # default fallback
37+
38+
while [[ $# -gt 0 ]]; do
39+
case "$1" in
40+
--flags)
41+
EXTRA_FLAGS="$2"
42+
shift 2
43+
;;
44+
--threshold)
45+
THRESHOLD="$2"
46+
shift 2
47+
;;
48+
*)
49+
echo "Unknown option: $1"
50+
exit 1
51+
;;
52+
esac
53+
done
54+
55+
# Config
56+
PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
57+
MODEL="qwen2_5-0_5b"
58+
MAX_SEQ=1024
59+
PTQ="16a4w"
60+
61+
EXTRA_FLAGS="$@"
62+
63+
# Run command and capture *both stdout and stderr*
64+
LOG_FILE="eval_${MODEL}_$(date +%Y%m%d_%H%M%S).log"
65+
66+
echo ">>> Running evaluation with flags: $EXTRA_FLAGS | threshold: $THRESHOLD"
67+
$PYTHON_EXECUTABLE -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn \
68+
--decoder_model "$MODEL" \
69+
--quant_linear_only \
70+
--max_seq_length "$MAX_SEQ" \
71+
--ptq "$PTQ" \
72+
$EXTRA_FLAGS 2>&1 | tee "$LOG_FILE"
73+
74+
# Extract last word_perplexity
75+
LAST_PERP=$(grep "INFO:root:wikitext:" "$LOG_FILE" | tail -n 1 | sed -E "s/.*'word_perplexity,none': ([0-9.]+).*/\1/")
76+
77+
if [[ -z "$LAST_PERP" ]]; then
78+
echo "❌ Could not find word_perplexity in logs!"
79+
exit 1
80+
fi
81+
82+
echo ">>> Last word_perplexity = $LAST_PERP"
83+
84+
# Compare against threshold
85+
awk -v val="$LAST_PERP" -v thr="$THRESHOLD" 'BEGIN {exit (val > thr)}'
86+
if [[ $? -ne 0 ]]; then
87+
echo "❌ Regression detected: word_perplexity ($LAST_PERP) > threshold ($THRESHOLD)"
88+
exit 1
89+
fi
90+
91+
echo "✅ Check passed: word_perplexity ($LAST_PERP) <= $THRESHOLD"

.ci/scripts/unittest-buck2.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,17 @@ BUILDABLE_KERNELS_PRIM_OPS_TARGETS=$(buck2 query //kernels/prim_ops/... | grep -
3535
for op in "build" "test"; do
3636
buck2 $op $BUILDABLE_OPTIMIZED_OPS \
3737
//examples/selective_build:select_all_dtype_selective_lib_portable_lib \
38+
//extension/llm/custom_ops/spinquant/test:fast_hadamard_transform_test \
39+
//extension/llm/runner/test:test_multimodal_input \
40+
//extension/llm/runner/test:test_generation_config \
3841
//kernels/portable/... \
3942
$BUILDABLE_KERNELS_PRIM_OPS_TARGETS //runtime/backend/... //runtime/core/... \
4043
//runtime/executor: //runtime/kernel/... //runtime/platform/...
4144
done
4245

4346
# Build only without testing
44-
buck2 build //codegen/tools/... # Needs torch for testing which we don't have in our OSS buck setup.
47+
buck2 build //codegen/tools/... \
48+
//extension/llm/runner/io_manager:io_manager \
49+
//extension/llm/modules/... \
50+
//extension/llm/runner:multimodal_runner_lib \
51+
//extension/llm/runner:text_decoder_runner

.github/workflows/add-unanswered-to-project.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ jobs:
1212
- name: Add open issues and open, non-draft PRs to org project (excluding certain authors)
1313
uses: actions/github-script@v7
1414
with:
15-
github-token: ${{ secrets.GITHUB_TOKEN }}
1615
script: |
1716
const projectId = "PVT_kwDOAUB9vs4A_PUL"; // PyTorch org project 136
1817
const owner = 'pytorch';

.github/workflows/android-release-artifacts.yml

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,11 @@ on:
1515
type: choice
1616
options:
1717
- "xnnpack"
18-
- "vulkan+xnnpack"
18+
- "vulkan"
1919
- "qnn"
2020
schedule:
2121
- cron: 0 10 * * *
2222

23-
concurrency:
24-
group: ${{ github.workflow }}-${{ github.ref }}
25-
cancel-in-progress: true
26-
2723
jobs:
2824
check-if-aar-exists:
2925
name: check-if-aar-exists
@@ -34,12 +30,13 @@ jobs:
3430
shell: bash
3531
run: |
3632
VERSION="${{ inputs.version }}"
33+
FLAVOR="${{ inputs.flavor }}"
3734
if [ -z "$VERSION" ]; then
3835
echo "No version name specified. Will create a snapshot AAR"
3936
exit 0
4037
fi
41-
if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar" | grep "200 OK"; then
42-
echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}/executorch.aar"
38+
if curl -I "https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}-${FLAVOR}/executorch.aar" | grep "200 OK"; then
39+
echo "AAR already exists at https://ossci-android.s3.amazonaws.com/executorch/release/${VERSION}-${FLAVOR}/executorch.aar"
4340
echo "Will skip build/upload"
4441
exit 1
4542
fi
@@ -93,7 +90,10 @@ jobs:
9390
fi
9491
9592
FLAVOR="${{ inputs.flavor }}"
96-
if [[ "$FLAVOR" == "vulkan+xnnpack" || -z "$FLAVOR" ]]; then
93+
if [[ "$FLAVOR" == "vulkan" || -z "$FLAVOR" ]]; then
94+
curl -O https://sdk.lunarg.com/sdk/download/1.4.321.1/linux/vulkansdk-linux-x86_64-1.4.321.1.tar.xz
95+
tar xf vulkansdk-linux-x86_64-1.4.321.1.tar.xz -C /tmp
96+
export PATH="/tmp/1.4.321.1/x86_64/bin:$PATH"
9797
export EXECUTORCH_BUILD_VULKAN=ON
9898
fi
9999
@@ -145,8 +145,12 @@ jobs:
145145
pip install awscli==1.32.18
146146
AWS_CMD="aws s3 cp"
147147
VERSION="${{ inputs.version }}"
148+
FLAVOR="${{ inputs.flavor }}"
148149
if [ -z "$VERSION" ]; then
149150
VERSION="snapshot-$(date +"%Y%m%d")"
150151
fi
151-
${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}/executorch.aar --acl public-read
152-
${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}/executorch.aar.sha256sums --acl public-read
152+
if [ -z "$FLAVOR" ]; then
153+
FLAVOR="xnnpack"
154+
fi
155+
${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar --acl public-read
156+
${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar.sha256sums --acl public-read

.github/workflows/trunk.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -973,6 +973,60 @@ jobs:
973973
# Test llama2
974974
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
975975
976+
# this is for filtering out the qnn changes such that qnn jobs only triggered when the specific files are changed
977+
changes:
978+
runs-on: ubuntu-latest
979+
outputs:
980+
qnn: ${{ steps.filter.outputs.qnn }}
981+
steps:
982+
- uses: actions/checkout@v4
983+
- uses: dorny/paths-filter@v3
984+
id: filter
985+
with:
986+
filters: |
987+
qnn:
988+
- 'backends/qualcomm/**'
989+
- 'examples/qualcomm/**'
990+
- 'examples/models/llama/**'
991+
992+
test-static-llama-qnn-eval-linux:
993+
needs: changes # has dependency on changes jobs defined above
994+
if: needs.changes.outputs.qnn == 'true'
995+
name: test-static-llama-qnn-eval-linux
996+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
997+
permissions:
998+
id-token: write
999+
contents: read
1000+
strategy:
1001+
fail-fast: false
1002+
matrix:
1003+
config:
1004+
- name: "baseline"
1005+
flags: ""
1006+
threshold: 62.0
1007+
with:
1008+
runner: linux.2xlarge
1009+
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
1010+
submodules: 'recursive'
1011+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
1012+
timeout: 180
1013+
script: |
1014+
# The generic Linux job chooses to use base env, not the one setup by the image
1015+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1016+
conda activate "${CONDA_ENV}"
1017+
BUILD_TOOL="cmake"
1018+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
1019+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
1020+
# Setup executorch
1021+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
1022+
# Setup install_requirements for llama
1023+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
1024+
1025+
echo ">>> Running config: ${{ matrix.config.name }}"
1026+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh \
1027+
--flags "${{ matrix.config.flags }}" \
1028+
--threshold "${{ matrix.config.threshold }}"
1029+
9761030
unittest-release:
9771031
uses: ./.github/workflows/_unittest.yml
9781032
permissions:

.lintrunner.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,7 @@ exclude_patterns = [
206206
'**/*.png',
207207
'**/*.webp',
208208
'**/*.jpeg',
209+
'**/*.mp3',
209210
'**/*.mp4',
210211
'**/*.pte',
211212
'**/*.pth',
@@ -216,6 +217,8 @@ exclude_patterns = [
216217
'**/*.jpg',
217218
'**/*.jar',
218219
'**/*.gif',
220+
'extension/llm/tokenizers',
221+
'extension/llm/tokenizers/**',
219222
# File contains @generated
220223
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',
221224
'extension/llm/custom_ops/spinquant/test/fast_hadamard_transform_special_unstrided_cpu.h',

backends/apple/coreml/runtime/delegate/ETCoreMLModelManager.mm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -436,11 +436,13 @@ - (nullable NSURL *)compiledModelURLWithIdentifier:(NSString *)identifier
436436
switch (modelAssetType.value()) {
437437
case ModelAssetType::CompiledModel: {
438438
// Model is already compiled.
439+
ETCoreMLLogInfo("The model in the pte file is pre-compiled. Skipping compilation.");
439440
return modelURL;
440441
}
441442

442443
case ModelAssetType::Model: {
443444
// Compile the model.
445+
ETCoreMLLogInfo("The model in the pte file is not pre-compiled. Compiling with a 5 min timeout.");
444446
NSURL *compiledModelURL = [ETCoreMLModelCompiler compileModelAtURL:modelURL
445447
maxWaitTimeInSeconds:(5 * 60)
446448
error:error];

backends/arm/operator_support/to_dim_order_copy_support.py

Lines changed: 38 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
#
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
5+
"""Declare operator support for ``_to_dim_order_copy`` in TOSA.
6+
7+
Provide dtype-compatibility checks for casting when converting to a specific
8+
dimension order. Supported input/output dtype pairs depend on the active TOSA
9+
profile (integer and/or float).
10+
11+
"""
512

613
# pyre-unsafe
714
import copy
@@ -25,6 +32,16 @@
2532

2633
@register_tosa_support_check
2734
class ToCopySupported(SupportedTOSAOperatorCheck):
35+
"""Provide TOSA support check for ``_to_dim_order_copy``.
36+
37+
Attributes:
38+
SUPPORTED_INT_PROFILE_DTYPES (dict[torch.dtype, list[torch.dtype]]):
39+
Allowed output dtypes for each integer input dtype.
40+
SUPPORTED_FP_PROFILE_DTYPES (dict[torch.dtype, list[torch.dtype]]):
41+
Allowed output dtypes for each floating input dtype.
42+
43+
"""
44+
2845
targets = [
2946
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
3047
]
@@ -40,21 +57,31 @@ def _merge_supported_types(
4057
dtypes1: SupportedTypeDict,
4158
dtypes2: SupportedTypeDict,
4259
) -> SupportedTypeDict:
60+
"""Return a merged mapping of supported dtype transitions.
61+
62+
Args:
63+
dtypes1 (dict[torch.dtype, list[torch.dtype]]): Base mapping.
64+
dtypes2 (dict[torch.dtype, list[torch.dtype]]): Mapping to merge in.
65+
66+
Returns:
67+
dict[torch.dtype, list[torch.dtype]]: Combined mapping.
68+
69+
"""
4370
merged_dtypes = copy.deepcopy(
4471
dtypes1
45-
) # Use deepcopy to avoid unintentionally modifying SUPPORTED_INT_TYPES
72+
) # Use deepcopy to avoid unintentionally modifying SUPPORTED_INT_PROFILE_DTYPES
4673
for k, v in dtypes2.items():
4774
merged_dtypes[k] = merged_dtypes.get(k, []) + v
4875
return merged_dtypes
4976

50-
SUPPORTED_INT_TYPES: SupportedTypeDict = {
77+
SUPPORTED_INT_PROFILE_DTYPES: SupportedTypeDict = {
5178
torch.bool: [torch.bool, torch.int8, torch.int16, torch.int32],
5279
torch.int8: [torch.bool, torch.int8, torch.int16, torch.int32],
5380
torch.int16: [torch.bool, torch.int8, torch.int16, torch.int32],
5481
torch.int32: [torch.bool, torch.int8, torch.int16, torch.int32],
5582
torch.int64: [torch.bool, torch.int8, torch.int16, torch.int32],
5683
}
57-
SUPPORTED_FLOAT_TYPES: SupportedTypeDict = {
84+
SUPPORTED_FP_PROFILE_DTYPES: SupportedTypeDict = {
5885
torch.int8: [torch.int8, torch.float16, torch.bfloat16, torch.float32],
5986
torch.int16: [torch.int16, torch.float16, torch.bfloat16, torch.float32],
6087
torch.int32: [torch.int32, torch.float16, torch.bfloat16, torch.float32],
@@ -92,22 +119,25 @@ def _merge_supported_types(
92119
torch.float32,
93120
],
94121
}
95-
ALL_SUPPORTED_TYPES = _merge_supported_types(
96-
SUPPORTED_INT_TYPES, SUPPORTED_FLOAT_TYPES
97-
)
98122

99123
def is_node_tosa_supported(
100124
self, node: fx.Node, tosa_spec: TosaSpecification
101125
) -> bool:
126+
"""Return True if the node is supported by TOSA.
127+
128+
Check FakeTensor metadata, validate input dtype is supported for the
129+
active profile, and ensure the output dtype is allowed for the given
130+
input dtype.
102131
132+
"""
103133
supported_dtypes: SupportedTypeDict = {}
104134
if tosa_spec.support_integer():
105135
supported_dtypes = self._merge_supported_types(
106-
self.SUPPORTED_INT_TYPES, supported_dtypes
136+
self.SUPPORTED_INT_PROFILE_DTYPES, supported_dtypes
107137
)
108138
if tosa_spec.support_float():
109139
supported_dtypes = self._merge_supported_types(
110-
self.SUPPORTED_FLOAT_TYPES, supported_dtypes
140+
self.SUPPORTED_FP_PROFILE_DTYPES, supported_dtypes
111141
)
112142

113143
if len(node.all_input_nodes) != 1:

0 commit comments

Comments
 (0)