File tree Expand file tree Collapse file tree 2 files changed +11
-1
lines changed
Expand file tree Collapse file tree 2 files changed +11
-1
lines changed Original file line number Diff line number Diff line change @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727 MODE=" $2 " # portable or xnnpack+custom or xnnpack+custom+qe
2828 shift 2
2929 ;;
30+ -pt2e_quantize)
31+ PT2E_QUANTIZE=" $2 " # portable or xnnpack+custom or xnnpack+custom+qe
32+ shift 2
33+ ;;
3034 -upload)
3135 UPLOAD_DIR=" $2 "
3236 shift 2
@@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
234238fi
235239if [[ " ${QNN} " == " ON" ]]; then
236240 EXPORT_ARGS=" ${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
241+ echo " PT2E_QUANTIZE is ${PT2E_QUANTIZE} "
242+ if [[ " ${PT2E_QUANTIZE} " == " qnn_16a16w" ]]; then
243+ EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data 'Once upon a time' "
244+ fi
237245fi
238246# Add dynamically linked library location
239247$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
Original file line number Diff line number Diff line change @@ -368,6 +368,7 @@ jobs:
368368 strategy :
369369 matrix :
370370 dtype : [fp32]
371+ pt2e_quantize : [qnn_16a16w]
371372 mode : [qnn]
372373 fail-fast : false
373374 with :
@@ -384,6 +385,7 @@ jobs:
384385 DTYPE=${{ matrix.dtype }}
385386 BUILD_TOOL="cmake"
386387 MODE=${{ matrix.mode }}
388+ PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
387389
388390 PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
389391 PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
@@ -393,7 +395,7 @@ jobs:
393395 # Install requirements for export_llama
394396 PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
395397 # Test llama2
396- PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}"
398+ PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -pt2e_quantize "${PT2E_QUANTIZE}"
397399
398400 test-phi-3-mini-runner-linux :
399401 name : test-phi-3-mini-runner-linux
You can’t perform that action at this time.
0 commit comments