Skip to content

Commit 2c11d56

Browse files
authored
Qualcomm AI Engine Direct - HF LLM CI (#14433)
### Summary [PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines. [PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line. [PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests). ### Test plan [PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.
1 parent 4c58010 commit 2c11d56

File tree

7 files changed

+213
-114
lines changed

7 files changed

+213
-114
lines changed

.ci/scripts/test_qnn_static_llama.sh

Lines changed: 0 additions & 69 deletions
This file was deleted.

.ci/scripts/test_qnn_static_llm.sh

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euxo pipefail
9+
10+
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
11+
12+
TASK_NAME=$1
13+
if [[ -z "${TASK_NAME:-}" ]]; then
14+
echo "Missing task name, exiting..."
15+
exit 1
16+
fi
17+
18+
19+
# Download QNN_SDK. If already downloaded, export environment path
20+
source "$(dirname "${BASH_SOURCE[0]}")/../../backends/qualcomm/scripts/install_qnn_sdk.sh"
21+
install_qnn
22+
23+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
24+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
25+
export PYTHONPATH=".."
26+
cp schema/program.fbs exir/_serialize/program.fbs
27+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
28+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
29+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
30+
31+
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
32+
PYTHON_EXECUTABLE=python3
33+
fi
34+
35+
which "${PYTHON_EXECUTABLE}"
36+
37+
# Although static llama CI does not require graphviz, it is required by test_qnn_delegate.py
38+
pip install graphviz
39+
40+
set +e
41+
42+
echo "Executing task: $TASK_NAME"
43+
if [[ "${TASK_NAME}" == "stories_110m" ]]; then
44+
# Download stories llama110m artifacts
45+
download_stories_model_artifacts
46+
echo "Creating tokenizer.bin"
47+
$PYTHON_EXECUTABLE -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
48+
49+
# Compile only as weight sharing is not applicable on x86.
50+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir ./stories_110m_pte_size --llama_artifacts . --compile_only
51+
exit_code1=$?
52+
53+
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
54+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_llama_stories_110m --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./stories_110m_accuracy --llama_artifacts . --enable_x86_64
55+
exit_code2=$?
56+
57+
# Check the exit codes and print messages
58+
if [ $exit_code1 -ne 0 ]; then
59+
echo "Static Llama compile only with weight sharing test failed. $exit_code1."
60+
fi
61+
62+
if [ $exit_code2 -ne 0 ]; then
63+
echo "Static Llama accuracy test failed. $exit_code2."
64+
fi
65+
66+
if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
67+
exit 1
68+
else
69+
exit 0
70+
fi
71+
72+
elif [[ "${TASK_NAME}" == "stories_260k_bc" ]]; then
73+
74+
# Check BC
75+
bash backends/qualcomm/bc/test_qnn_static_llama_bc.sh
76+
exit_code1=$?
77+
if [ $exit_code1 -ne 0 ]; then
78+
exit 1
79+
else
80+
exit 0
81+
fi
82+
83+
elif [[ "${TASK_NAME}" == "smollm2_135m" ]]; then
84+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleLLMScript.test_static_smollm2 --model SM8650 --build_folder build-x86/ --executorch_root . --artifact_dir ./static_smollm2 --enable_x86_64
85+
exit_code1=$?
86+
if [ $exit_code1 -ne 0 ]; then
87+
exit 1
88+
else
89+
exit 0
90+
fi
91+
else
92+
echo "Unsupported task: $TASK_NAME"
93+
exit 1
94+
fi

.github/workflows/nightly.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,3 +36,37 @@ jobs:
3636
uses: ./.github/workflows/_link_check.yml
3737
with:
3838
ref: ${{ github.sha }}
39+
40+
test-static-hf-llm-qnn-linux:
41+
name: test-static-hf-llm-qnn-linux
42+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
43+
permissions:
44+
id-token: write
45+
contents: read
46+
strategy:
47+
matrix:
48+
task: [smollm2_135m]
49+
fail-fast: false
50+
with:
51+
runner: linux.24xlarge
52+
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
53+
submodules: 'recursive'
54+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
55+
timeout: 900
56+
script: |
57+
# The generic Linux job chooses to use base env, not the one setup by the image
58+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
59+
conda activate "${CONDA_ENV}"
60+
61+
BUILD_TOOL="cmake"
62+
63+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
64+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
65+
66+
# Setup executorch
67+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
68+
69+
# Setup install_requirements for llama
70+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
71+
72+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llm.sh ${{ matrix.task }}

.github/workflows/pull.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,8 @@ jobs:
566566
id-token: write
567567
contents: read
568568
strategy:
569+
matrix:
570+
task: [stories_110m, stories_260k_bc]
569571
fail-fast: false
570572
with:
571573
runner: linux.2xlarge
@@ -589,8 +591,7 @@ jobs:
589591
# Setup install_requirements for llama
590592
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
591593
592-
# Test static llama weight sharing and accuracy
593-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
594+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llm.sh ${{ matrix.task }}
594595
595596
test-qnn-models-linux:
596597
name: test-qnn-models-linux

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6304,8 +6304,6 @@ def test_static_smollm2(self):
63046304
"kv",
63056305
"--temperature",
63066306
"0",
6307-
"--prefill_ar_len",
6308-
"128",
63096307
"--max_seq_len",
63106308
"1024",
63116309
"--eval_perplexity",
@@ -6333,8 +6331,10 @@ def test_static_smollm2(self):
63336331
if "Error" in msg:
63346332
self.fail(msg["Error"])
63356333
else:
6334+
print("Perplexity score: ", msg["wiki_ppl"])
63366335
self.assertLessEqual(msg["wiki_ppl"], 25)
6337-
self.assertGreaterEqual(msg["inference_speed"], 200)
6336+
if not self.enable_x86_64:
6337+
self.assertGreaterEqual(msg["inference_speed"], 200)
63386338

63396339
def test_static_smollm3(self):
63406340
if not self.required_envs():

0 commit comments

Comments
 (0)