Skip to content

Commit c7b74bf

Browse files
cccclaifacebook-github-bot
authored andcommitted
Add qnn 16a16w quantization test (#7039)
Summary: Pull Request resolved: #7039 Differential Revision: D66390212
1 parent a7ed425 commit c7b74bf

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

.ci/scripts/test_llama.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
234238
fi
235239
if [[ "${QNN}" == "ON" ]]; then
236240
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
241+
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
242+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
243+
EXPORT_ARGS+="--tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data 'Once upon a time' "
244+
fi
237245
fi
238246
# Add dynamically linked library location
239247
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

.github/workflows/pull.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ jobs:
368368
strategy:
369369
matrix:
370370
dtype: [fp32]
371+
pt2e_quantize: [qnn_16a16w]
371372
mode: [qnn]
372373
fail-fast: false
373374
with:
@@ -384,6 +385,7 @@ jobs:
384385
DTYPE=${{ matrix.dtype }}
385386
BUILD_TOOL="cmake"
386387
MODE=${{ matrix.mode }}
388+
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
387389
388390
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
389391
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
393395
# Install requirements for export_llama
394396
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395397
# Test llama2
396-
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
398+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397399
398400
test-phi-3-mini-runner-linux:
399401
name: test-phi-3-mini-runner-linux

0 commit comments

Comments
 (0)