Skip to content

Commit 98f6128

Browse files
cccclaifacebook-github-bot
authored andcommitted
Add qnn 16a16w quantization test
Differential Revision: D66390212
1 parent a7ed425 commit 98f6128

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

.ci/scripts/test_llama.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
2828
shift 2
2929
;;
30+
-pt2e_quantize)
31+
PT2E_QUANTIZE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
32+
shift 2
33+
;;
3034
-upload)
3135
UPLOAD_DIR="$2"
3236
shift 2
@@ -234,6 +238,9 @@ if [[ "${COREML}" == "ON" ]]; then
234238
fi
235239
if [[ "${QNN}" == "ON" ]]; then
236240
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
241+
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
242+
EXPORT_ARGS+="--tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data 'Once upon a time' "
243+
fi
237244
fi
238245
# Add dynamically linked library location
239246
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}

0 commit comments

Comments
 (0)