Skip to content

Commit 284e95b

Browse files
committed
Create a script for qnn static llama
1 parent 574838e commit 284e95b

File tree

2 files changed

+63
-45
lines changed

2 files changed

+63
-45
lines changed
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -exu
9+
10+
11+
# Download and create artifacts.
12+
PARAMS="params.json"
13+
CHECKPOINT_FILE_NAME=""
14+
touch "${PARAMS}"
15+
if [[ "${MODEL_NAME}" == "llama" ]] || [[ "${MODEL_NAME}" == "stories"* ]] || [[ "${MODEL_NAME}" == "tinyllama" ]]; then
16+
CHECKPOINT_FILE_NAME="stories110M.pt"
17+
download_stories_model_artifacts
18+
else
19+
echo "Unsupported model name ${MODEL_NAME}"
20+
exit 1
21+
fi
22+
23+
# Create tokenizer.bin.
24+
echo "Creating tokenizer.bin"
25+
$PYTHON_EXECUTABLE -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
26+
27+
export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/.." && pwd)"
28+
export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
29+
export LD_LIBRARY_PATH="${QNN_SDK_ROOT}/lib/x86_64-linux-clang"
30+
export PYTHONPATH=".."
31+
cp schema/program.fbs exir/_serialize/program.fbs
32+
cp schema/scalar_type.fbs exir/_serialize/scalar_type.fbs
33+
cp -f build-x86/backends/qualcomm/PyQnnManagerAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
34+
cp -f build-x86/backends/qualcomm/PyQnnWrapperAdaptor.cpython-310-x86_64-linux-gnu.so backends/qualcomm/python
35+
36+
37+
# Compile only as weight sharing is not applicable on x86
38+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
39+
exit_code1=$?
40+
41+
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
42+
$PYTHON_EXECUTABLE backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
43+
exit_code2=$?
44+
45+
# Check the exit codes and print messages
46+
if [ $exit_code1 -ne 0 ]; then
47+
echo "Static Llama Compile only weight sharing test failed. $exit_code1."
48+
fi
49+
50+
if [ $exit_code2 -ne 0 ]; then
51+
echo "Static Llama accuracy test failed. $exit_code2."
52+
fi
53+
54+
# Return failure if either program failed
55+
if [ $exit_code1 -ne 0 ] || [ $exit_code2 -ne 0 ]; then
56+
exit 1
57+
else
58+
exit 0
59+
fi

.github/workflows/pull.yml

Lines changed: 4 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -440,9 +440,8 @@ jobs:
440440
# Test llama2
441441
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
442442
443-
# Compile only as weight sharing is not applicable on x86
444-
test-static-llama-size-qnn-linux:
445-
name: test-static-llama-size-qnn-linux
443+
test-static-llama-qnn-linux:
444+
name: test-static-llama-qnn-linux
446445
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
447446
permissions:
448447
id-token: write
@@ -470,49 +469,9 @@ jobs:
470469
471470
# Setup install_requirements for llama
472471
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
473-
474-
# Retrieve 110M Stories Llama Artifacts
475-
PYTHON_EXECUTABLE=python . .ci/scripts/utils.sh
476-
PYTHON_EXECUTABLE=python download_stories_model_artifacts
477-
PYTHONPATH="${PWD}" python -m extension.llm.tokenizer.tokenizer -t tokenizer.model -o tokenizer.bin
478-
479-
# Test static llama stories110m pte size
480-
PYTHONPATH="${PWD}" python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-android/ --executorch_root . --artifact_dir . --compile_only
481-
482-
# Checks accuracy with weight sharing disabled since x86 does not support weight sharing.
483-
test-static-llama-accuracy-qnn-linux:
484-
name: test-static-llama-accuracy-qnn-linux
485-
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
486-
strategy:
487-
fail-fast: false
488-
with:
489-
runner: linux.2xlarge
490-
docker-image: executorch-ubuntu-22.04-qnn-sdk
491-
submodules: 'true'
492-
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
493-
timeout: 900
494-
script: |
495-
# The generic Linux job chooses to use base env, not the one setup by the image
496-
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
497-
conda activate "${CONDA_ENV}"
498-
499-
BUILD_TOOL="cmake"
500-
501-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
502-
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
503-
504-
# Setup executorch
505-
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
506-
507-
# Setup install_requirements for llama
508-
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
509-
510-
# Retrieve 110M Stories Llama Artifacts
511-
PYTHON_EXECUTABLE=python . .ci/scripts/utils.sh
512-
PYTHON_EXECUTABLE=python download_stories_model_artifacts
513472
514-
# Test static llama stories110m accuracy
515-
PYTHONPATH="${PWD}" python backends/qualcomm/tests/test_qnn_delegate.py -k TestExampleScript.test_stories_single_llama --model SM8650 --build_folder build-x86_64/ --executorch_root . --artifact_dir . --enable_x86_64
473+
# Test static llama weight sharing and accuracy
474+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
516475
517476
test-qnn-models-linux:
518477
name: test-qnn-models-linux

0 commit comments

Comments
 (0)