File tree Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Original file line number Diff line number Diff line change @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
2727 MODE=" $2 " # portable or xnnpack+custom or xnnpack+custom+qe
2828 shift 2
2929 ;;
30+ -pt2e_quantize)
31+ PT2E_QUANTIZE=" $2 "
32+ shift 2
33+ ;;
3034 -upload)
3135 UPLOAD_DIR=" $2 "
3236 shift 2
@@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
234238fi
235239if [[ " ${QNN} " == " ON" ]]; then
236240 EXPORT_ARGS=" ${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
241+ echo " PT2E_QUANTIZE is ${PT2E_QUANTIZE} "
242+ if [[ " ${PT2E_QUANTIZE} " == " qnn_16a16w" ]]; then
243+ EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
244+ fi
237245fi
238246# Add dynamically linked library location
239247$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
Original file line number Diff line number Diff line change @@ -441,3 +441,39 @@ jobs:
441441
442442 cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
443443 echo "::endgroup::"
444+
445+
446+ test-llama-runner-qnn-linux :
447+ name : test-llama-runner-qnn-linux
448+ uses : pytorch/test-infra/.github/workflows/linux_job.yml@main
449+ strategy :
450+ matrix :
451+ dtype : [fp32]
452+ pt2e_quantize : [qnn_16a16w, qnn_8a8w]
453+ mode : [qnn]
454+ fail-fast : false
455+ with :
456+ runner : linux.2xlarge
457+ docker-image : executorch-ubuntu-22.04-qnn-sdk
458+ submodules : ' true'
459+ ref : ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
460+ timeout : 900
461+ script : |
462+ # The generic Linux job chooses to use base env, not the one setup by the image
463+ CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
464+ conda activate "${CONDA_ENV}"
465+
466+ BUILD_TOOL="cmake"
467+ DTYPE=${{ matrix.dtype }}
468+ MODE=${{ matrix.mode }}
469+ PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
470+
471+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
472+ PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
473+
474+ # Setup executorch
475+ PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
476+ # Install requirements for export_llama
477+ PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
478+ # Test llama2
479+ PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
You can’t perform that action at this time.
0 commit comments