Skip to content

Commit b8a930d

Browse files
committed
add qnn eval script
1 parent 30a904b commit b8a930d

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
# Copyright (c) Qualcomm Innovation Center, Inc.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set -euo pipefail
9+
10+
# Config
11+
PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
12+
MODEL="qwen2_5-0_5b"
13+
MAX_SEQ=1024
14+
PTQ="16a4w"
15+
THRESHOLD=62.0 # regression guardrail
16+
17+
# Run command and capture log
18+
LOG_FILE="eval_${MODEL}_$(date +%Y%m%d_%H%M%S).log"
19+
20+
echo ">>> Running evaluation..."
21+
$PYTHON_EXECUTABLE -m executorch.examples.qualcomm.oss_scripts.llama.eval_llama_qnn \
22+
--decoder_model "$MODEL" \
23+
--quant_linear_only \
24+
--max_seq_length "$MAX_SEQ" \
25+
--ptq "$PTQ" | tee "$LOG_FILE"
26+
27+
# Extract last word_perplexity
28+
LAST_PERP=$(grep "INFO:root:wikitext:" "$LOG_FILE" | tail -n 1 | sed -E "s/.*'word_perplexity,none': ([0-9.]+).*/\1/")
29+
30+
if [[ -z "$LAST_PERP" ]]; then
31+
echo "❌ Could not find word_perplexity in logs!"
32+
exit 1
33+
fi
34+
35+
echo ">>> Last word_perplexity = $LAST_PERP"
36+
37+
# Compare against threshold
38+
awk -v val="$LAST_PERP" -v thr="$THRESHOLD" 'BEGIN {exit (val > thr)}'
39+
if [[ $? -ne 0 ]]; then
40+
echo "❌ Regression detected: word_perplexity ($LAST_PERP) > threshold ($THRESHOLD)"
41+
exit 1
42+
fi
43+
44+
echo "✅ Check passed: word_perplexity ($LAST_PERP) <= $THRESHOLD"

.github/workflows/pull.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -583,6 +583,41 @@ jobs:
583583
# Test static llama weight sharing and accuracy
584584
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
585585
586+
587+
test-static-llama-qnn-eval-linux:
588+
name: test-static-llama-qnn-eval-linux
589+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
590+
permissions:
591+
id-token: write
592+
contents: read
593+
strategy:
594+
fail-fast: false
595+
with:
596+
runner: linux.2xlarge
597+
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
598+
submodules: 'recursive'
599+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
600+
timeout: 180
601+
script: |
602+
# The generic Linux job chooses to use base env, not the one setup by the image
603+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
604+
conda activate "${CONDA_ENV}"
605+
606+
BUILD_TOOL="cmake"
607+
608+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
609+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
610+
611+
# Setup executorch
612+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
613+
614+
# Setup install_requirements for llama
615+
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
616+
617+
# Test static llama weight sharing and accuracy
618+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh
619+
620+
586621
test-qnn-models-linux:
587622
name: test-qnn-models-linux
588623
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

0 commit comments

Comments
 (0)