Skip to content

Add qnn 16a16w quantization test #6256

Add qnn 16a16w quantization test

Add qnn 16a16w quantization test #6256

Workflow file for this run

name: trunk
on:
push:
branches:
- main
- release/*
tags:
- ciflow/trunk/*
pull_request:
paths:
- .ci/docker/ci_commit_pins/pytorch.txt
- .ci/scripts/**
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
cancel-in-progress: true
jobs:
gather-models:
runs-on: ubuntu-22.04
outputs:
models: ${{ steps.gather-models.outputs.models }}
steps:
- uses: actions/checkout@v3
with:
submodules: 'false'
- uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Extract the list of models to test
id: gather-models
run: |
set -eux
PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"
# test-models-macos:
# name: test-models-macos
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# needs: gather-models
# strategy:
# matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
# fail-fast: false
# with:
# runner: ${{ matrix.runner }}
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: ${{ matrix.timeout }}
# script: |
# MODEL_NAME=${{ matrix.model }}
# BUILD_TOOL=${{ matrix.build-tool }}
# BACKEND=${{ matrix.backend }}
# DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # Build and test executorch
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
# test-custom-ops-macos:
# name: test-custom-ops-macos
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# matrix:
# include:
# - build-tool: cmake
# fail-fast: false
# with:
# runner: macos-m1-stable
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# script: |
# BUILD_TOOL=${{ matrix.build-tool }}
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # Build and test custom ops
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
# test-selective-build-macos:
# name: test-selective-build-macos
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# matrix:
# include:
# - build-tool: cmake
# fail-fast: false
# with:
# runner: macos-m1-stable
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# script: |
# BUILD_TOOL=${{ matrix.build-tool }}
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # Build and test selective build
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
# test-demo-backend-delegation:
# name: test-demo-backend-delegation
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# strategy:
# matrix:
# include:
# - build-tool: buck2
# - build-tool: cmake
# fail-fast: false
# with:
# runner: linux.2xlarge
# docker-image: executorch-ubuntu-22.04-clang12
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# script: |
# # The generic Linux job chooses to use base env, not the one setup by the image
# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
# conda activate "${CONDA_ENV}"
# BUILD_TOOL=${{ matrix.build-tool }}
# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# # Test selective build
# PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
# test-arm-backend-delegation:
# name: test-arm-backend-delegation
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# with:
# runner: linux.2xlarge
# docker-image: executorch-ubuntu-22.04-arm-sdk
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 90
# script: |
# # The generic Linux job chooses to use base env, not the one setup by the image
# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
# conda activate "${CONDA_ENV}"
# source .ci/scripts/utils.sh
# install_executorch
# install_arm
# # Increase number of files user can monitor to bypass buck failures.
# # Hopefully this is high enough for this setup.
# sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
# # Test ethos-u delegate examples with run.sh
# PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/
# test-arm-reference-delegation:
# name: test-arm-reference-delegation
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# with:
# runner: linux.2xlarge
# docker-image: executorch-ubuntu-22.04-arm-sdk
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 90
# script: |
# # The generic Linux job chooses to use base env, not the one setup by the image
# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
# conda activate "${CONDA_ENV}"
# source .ci/scripts/utils.sh
# install_executorch
# install_arm
# # Run arm unit tests
# pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test
# test-coreml-delegate:
# name: test-coreml-delegate
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# with:
# runner: macos-13-xlarge
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 90
# script: |
# BUILD_TOOL=cmake
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # Build and test coreml delegate
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
# test-pybind-build-macos:
# name: test-pybind-build-macos
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# matrix:
# include:
# - build-tool: cmake
# fail-fast: false
# with:
# runner: macos-m1-stable
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 180
# script: |
# bash .ci/scripts/setup-conda.sh
# # build module for executorch.extension.pybindings.portable_lib
# BUILD_TOOL=${{ matrix.build-tool }}
# EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # see if we can import the module successfully
# ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"
# test-llama-runner-macos:
# name: test-llama-runner-mac
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# matrix:
# dtype: [fp32]
# mode: [portable, xnnpack+kv+custom, mps, coreml]
# include:
# - dtype: bf16
# mode: portable
# - dtype: bf16
# mode: custom
# fail-fast: false
# with:
# runner: macos-m1-stable
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 900
# script: |
# DTYPE=${{ matrix.dtype }}
# MODE=${{ matrix.mode }}
# bash .ci/scripts/setup-conda.sh
# # Setup executorch
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake
# if [[ "${MODE}" == "mps" ]]; then
# # Install mps delegate
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
# echo "Finishing installing mps."
# elif [[ "${MODE}" == "coreml" ]]; then
# # Install coreml delegate
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
# echo "Finishing installing coreml."
# fi
# # Install requirements for export_llama
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# # Test llama2
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
# # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
# # test-llava-runner-macos:
# # name: test-llava-runner-macos
# # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# # strategy:
# # fail-fast: false
# # with:
# # runner: macos-14-xlarge
# # python-version: '3.11'
# # submodules: 'true'
# # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# # timeout: 900
# # script: |
# # BUILD_TOOL=cmake
# # bash .ci/scripts/setup-conda.sh
# # # Setup MacOS dependencies as there is no Docker support on MacOS atm
# # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# # # install Llava requirements
# # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
# # ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
# # # run python unittest
# # ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
# # # run e2e (export, tokenizer and runner)
# # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release
# test-qnn-model:
# name: test-qnn-model
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# strategy:
# matrix:
# dtype: [fp32]
# model: [dl3, mv3, mv2, ic4, ic3, vit]
# fail-fast: false
# with:
# runner: linux.2xlarge
# docker-image: executorch-ubuntu-22.04-qnn-sdk
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 900
# script: |
# # The generic Linux job chooses to use base env, not the one setup by the image
# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
# conda activate "${CONDA_ENV}"
# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
# PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
# PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
# test-apple-model:
# name: test-apple-model
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
# strategy:
# fail-fast: false
# with:
# runner: macos-m1-stable
# python-version: '3.11'
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 90
# script: |
# BUILD_TOOL=cmake
# bash .ci/scripts/setup-conda.sh
# # Setup MacOS dependencies as there is no Docker support on MacOS atm
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
# echo "Finishing installing coreml."
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
# echo "Finishing installing mps."
# # Build and test coreml model
# MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
# for MODEL_NAME in "${MODELS[@]}"; do
# echo "::group::Exporting coreml model: $MODEL_NAME"
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml"
# echo "::endgroup::"
# echo "::group::Exporting mps model: $MODEL_NAME"
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
# echo "::endgroup::"
# done
# test-huggingface-transformers:
# name: test-huggingface-transformers
# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
# secrets: inherit
# strategy:
# matrix:
# hf_model_repo: [google/gemma-2b]
# fail-fast: false
# with:
# secrets-env: EXECUTORCH_HF_TOKEN
# runner: linux.12xlarge
# docker-image: executorch-ubuntu-22.04-clang12
# submodules: 'true'
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
# timeout: 90
# script: |
# echo "::group::Set up ExecuTorch"
# # The generic Linux job chooses to use base env, not the one setup by the image
# CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
# conda activate "${CONDA_ENV}"
# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
# echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
# rm -rf cmake-out
# cmake \
# -DCMAKE_INSTALL_PREFIX=cmake-out \
# -DCMAKE_BUILD_TYPE=Release \
# -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
# -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
# -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
# -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
# -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
# -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
# -DEXECUTORCH_BUILD_XNNPACK=ON \
# -DPYTHON_EXECUTABLE=python \
# -Bcmake-out .
# cmake --build cmake-out -j9 --target install --config Release
# echo "Build llama runner"
# dir="examples/models/llama"
# cmake \
# -DCMAKE_INSTALL_PREFIX=cmake-out \
# -DCMAKE_BUILD_TYPE=Release \
# -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
# -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
# -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
# -DEXECUTORCH_BUILD_XNNPACK=ON \
# -DPYTHON_EXECUTABLE=python \
# -Bcmake-out/${dir} \
# ${dir}
# cmake --build cmake-out/${dir} -j9 --config Release
# echo "::endgroup::"
# echo "::group::Set up HuggingFace Dependencies"
# if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
# echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
# exit 1
# fi
# pip install -U "huggingface_hub[cli]"
# huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
# pip install accelerate sentencepiece
# pip list
# echo "::endgroup::"
# echo "::group::Export to ExecuTorch"
# TOKENIZER_FILE=tokenizer.model
# TOKENIZER_BIN_FILE=tokenizer.bin
# ET_MODEL_NAME=et_model
# # Fetch the file using a Python one-liner
# DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "
# from huggingface_hub import hf_hub_download
# # Download the file from the Hugging Face Hub
# downloaded_path = hf_hub_download(
# repo_id='${{ matrix.hf_model_repo }}',
# filename='${TOKENIZER_FILE}'
# )
# print(downloaded_path)
# ")
# if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
# echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
# python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
# ls ./tokenizer.bin
# else
# echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
# exit 1
# fi
# python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}
# cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
# echo "::endgroup::"