trunk

Add qnn 16a16w quantization test #6256

Workflow file for this run

	name: trunk

	on:
	push:
	branches:
	- main
	- release/*
	tags:
	- ciflow/trunk/*
	pull_request:
	paths:
	- .ci/docker/ci_commit_pins/pytorch.txt
	- .ci/scripts/**
	workflow_dispatch:

	concurrency:
	group: ${{ github.workflow }}-${{ github.event.pull_request.number \|\| github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
	cancel-in-progress: true

	jobs:
	gather-models:
	runs-on: ubuntu-22.04
	outputs:
	models: ${{ steps.gather-models.outputs.models }}
	steps:
	- uses: actions/checkout@v3
	with:
	submodules: 'false'
	- uses: actions/setup-python@v4
	with:
	python-version: '3.10'
	- name: Extract the list of models to test
	id: gather-models
	run: \|
	set -eux

	PYTHONPATH="${PWD}" python .ci/scripts/gather_test_models.py --target-os macos --event "${GITHUB_EVENT_NAME}"

	# test-models-macos:
	# name: test-models-macos
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# needs: gather-models
	# strategy:
	# matrix: ${{ fromJSON(needs.gather-models.outputs.models) }}
	# fail-fast: false
	# with:
	# runner: ${{ matrix.runner }}
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: ${{ matrix.timeout }}
	# script: \|
	# MODEL_NAME=${{ matrix.model }}
	# BUILD_TOOL=${{ matrix.build-tool }}
	# BACKEND=${{ matrix.backend }}
	# DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}

	# bash .ci/scripts/setup-conda.sh
	# # Setup MacOS dependencies as there is no Docker support on MacOS atm
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
	# # Build and test executorch
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"

	# test-custom-ops-macos:
	# name: test-custom-ops-macos
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# strategy:
	# matrix:
	# include:
	# - build-tool: cmake
	# fail-fast: false
	# with:
	# runner: macos-m1-stable
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# script: \|
	# BUILD_TOOL=${{ matrix.build-tool }}

	# bash .ci/scripts/setup-conda.sh
	# # Setup MacOS dependencies as there is no Docker support on MacOS atm
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
	# # Build and test custom ops
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"

	# test-selective-build-macos:
	# name: test-selective-build-macos
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# strategy:
	# matrix:
	# include:
	# - build-tool: cmake
	# fail-fast: false
	# with:
	# runner: macos-m1-stable
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# script: \|
	# BUILD_TOOL=${{ matrix.build-tool }}

	# bash .ci/scripts/setup-conda.sh
	# # Setup MacOS dependencies as there is no Docker support on MacOS atm
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
	# # Build and test selective build
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"

	# test-demo-backend-delegation:
	# name: test-demo-backend-delegation
	# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	# strategy:
	# matrix:
	# include:
	# - build-tool: buck2
	# - build-tool: cmake
	# fail-fast: false
	# with:
	# runner: linux.2xlarge
	# docker-image: executorch-ubuntu-22.04-clang12
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# script: \|
	# # The generic Linux job chooses to use base env, not the one setup by the image
	# CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	# conda activate "${CONDA_ENV}"

	# BUILD_TOOL=${{ matrix.build-tool }}
	# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
	# # Test selective build
	# PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"

	# test-arm-backend-delegation:
	# name: test-arm-backend-delegation
	# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	# with:
	# runner: linux.2xlarge
	# docker-image: executorch-ubuntu-22.04-arm-sdk
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 90
	# script: \|
	# # The generic Linux job chooses to use base env, not the one setup by the image
	# CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	# conda activate "${CONDA_ENV}"

	# source .ci/scripts/utils.sh
	# install_executorch

	# install_arm

	# # Increase number of files user can monitor to bypass buck failures.
	# # Hopefully this is high enough for this setup.
	# sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024

	# # Test ethos-u delegate examples with run.sh
	# PYTHON_EXECUTABLE=python bash examples/arm/run.sh examples/arm/ethos-u-scratch/

	# test-arm-reference-delegation:
	# name: test-arm-reference-delegation
	# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	# with:
	# runner: linux.2xlarge
	# docker-image: executorch-ubuntu-22.04-arm-sdk
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 90
	# script: \|
	# # The generic Linux job chooses to use base env, not the one setup by the image
	# CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	# conda activate "${CONDA_ENV}"

	# source .ci/scripts/utils.sh
	# install_executorch

	# install_arm

	# # Run arm unit tests
	# pytest -c /dev/null -v -n auto --cov=./ --cov-report=xml backends/arm/test

	# test-coreml-delegate:
	# name: test-coreml-delegate
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# with:
	# runner: macos-13-xlarge
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 90
	# script: \|
	# BUILD_TOOL=cmake

	# bash .ci/scripts/setup-conda.sh
	# # Setup MacOS dependencies as there is no Docker support on MacOS atm
	# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
	# # Build and test coreml delegate
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh

	# test-pybind-build-macos:
	# name: test-pybind-build-macos
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# strategy:
	# matrix:
	# include:
	# - build-tool: cmake
	# fail-fast: false
	# with:
	# runner: macos-m1-stable
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 180
	# script: \|
	# bash .ci/scripts/setup-conda.sh

	# # build module for executorch.extension.pybindings.portable_lib
	# BUILD_TOOL=${{ matrix.build-tool }}
	# EXECUTORCH_BUILD_PYBIND=ON PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

	# # see if we can import the module successfully
	# ${CONDA_RUN} python -c "from executorch.extension.pybindings import portable_lib; print('success!')"

	# test-llama-runner-macos:
	# name: test-llama-runner-mac
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# strategy:
	# matrix:
	# dtype: [fp32]
	# mode: [portable, xnnpack+kv+custom, mps, coreml]
	# include:
	# - dtype: bf16
	# mode: portable
	# - dtype: bf16
	# mode: custom
	# fail-fast: false
	# with:
	# runner: macos-m1-stable
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 900
	# script: \|

	# DTYPE=${{ matrix.dtype }}
	# MODE=${{ matrix.mode }}

	# bash .ci/scripts/setup-conda.sh

	# # Setup executorch
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh cmake

	# if [[ "${MODE}" == "mps" ]]; then
	# # Install mps delegate
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
	# echo "Finishing installing mps."
	# elif [[ "${MODE}" == "coreml" ]]; then
	# # Install coreml delegate
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
	# echo "Finishing installing coreml."
	# fi

	# # Install requirements for export_llama
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
	# # Test llama2
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"

	# # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
	# # test-llava-runner-macos:
	# # name: test-llava-runner-macos
	# # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# # strategy:
	# # fail-fast: false
	# # with:
	# # runner: macos-14-xlarge
	# # python-version: '3.11'
	# # submodules: 'true'
	# # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# # timeout: 900
	# # script: \|
	# # BUILD_TOOL=cmake

	# # bash .ci/scripts/setup-conda.sh
	# # # Setup MacOS dependencies as there is no Docker support on MacOS atm
	# # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"

	# # # install Llava requirements
	# # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
	# # ${CONDA_RUN} bash examples/models/llava/install_requirements.sh

	# # # run python unittest
	# # ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava

	# # # run e2e (export, tokenizer and runner)
	# # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh Release

	# test-qnn-model:
	# name: test-qnn-model
	# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	# strategy:
	# matrix:
	# dtype: [fp32]
	# model: [dl3, mv3, mv2, ic4, ic3, vit]
	# fail-fast: false
	# with:
	# runner: linux.2xlarge
	# docker-image: executorch-ubuntu-22.04-qnn-sdk
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 900
	# script: \|
	# # The generic Linux job chooses to use base env, not the one setup by the image
	# CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	# conda activate "${CONDA_ENV}"
	# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake
	# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
	# PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
	# PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"

	# test-apple-model:
	# name: test-apple-model
	# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
	# strategy:
	# fail-fast: false
	# with:
	# runner: macos-m1-stable
	# python-version: '3.11'
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 90
	# script: \|
	# BUILD_TOOL=cmake

	# bash .ci/scripts/setup-conda.sh

	# # Setup MacOS dependencies as there is no Docker support on MacOS atm
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh "${BUILD_TOOL}"
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
	# echo "Finishing installing coreml."
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/mps/install_requirements.sh
	# echo "Finishing installing mps."

	# # Build and test coreml model
	# MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
	# for MODEL_NAME in "${MODELS[@]}"; do
	# echo "::group::Exporting coreml model: $MODEL_NAME"
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml"
	# echo "::endgroup::"

	# echo "::group::Exporting mps model: $MODEL_NAME"
	# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
	# echo "::endgroup::"
	# done

	# test-huggingface-transformers:
	# name: test-huggingface-transformers
	# uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
	# secrets: inherit
	# strategy:
	# matrix:
	# hf_model_repo: [google/gemma-2b]
	# fail-fast: false
	# with:
	# secrets-env: EXECUTORCH_HF_TOKEN
	# runner: linux.12xlarge
	# docker-image: executorch-ubuntu-22.04-clang12
	# submodules: 'true'
	# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha \|\| github.sha }}
	# timeout: 90
	# script: \|
	# echo "::group::Set up ExecuTorch"
	# # The generic Linux job chooses to use base env, not the one setup by the image
	# CONDA_ENV=$(conda env list --json \| jq -r ".envs \| .[-1]")
	# conda activate "${CONDA_ENV}"
	# PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh cmake

	# echo "Installing libexecutorch.a, libextension_module.so, libportable_ops_lib.a"
	# rm -rf cmake-out
	# cmake \
	# -DCMAKE_INSTALL_PREFIX=cmake-out \
	# -DCMAKE_BUILD_TYPE=Release \
	# -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
	# -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
	# -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
	# -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
	# -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
	# -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
	# -DEXECUTORCH_BUILD_XNNPACK=ON \
	# -DPYTHON_EXECUTABLE=python \
	# -Bcmake-out .
	# cmake --build cmake-out -j9 --target install --config Release

	# echo "Build llama runner"
	# dir="examples/models/llama"
	# cmake \
	# -DCMAKE_INSTALL_PREFIX=cmake-out \
	# -DCMAKE_BUILD_TYPE=Release \
	# -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
	# -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
	# -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
	# -DEXECUTORCH_BUILD_XNNPACK=ON \
	# -DPYTHON_EXECUTABLE=python \
	# -Bcmake-out/${dir} \
	# ${dir}
	# cmake --build cmake-out/${dir} -j9 --config Release
	# echo "::endgroup::"

	# echo "::group::Set up HuggingFace Dependencies"
	# if [ -z "$SECRET_EXECUTORCH_HF_TOKEN" ]; then
	# echo "::error::SECRET_EXECUTORCH_HF_TOKEN is empty. For security reason secrets won't be accessible on forked PRs. Please make sure you submit a non-forked PR."
	# exit 1
	# fi
	# pip install -U "huggingface_hub[cli]"
	# huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
	# pip install accelerate sentencepiece
	# pip list
	# echo "::endgroup::"

	# echo "::group::Export to ExecuTorch"
	# TOKENIZER_FILE=tokenizer.model
	# TOKENIZER_BIN_FILE=tokenizer.bin
	# ET_MODEL_NAME=et_model
	# # Fetch the file using a Python one-liner
	# DOWNLOADED_TOKENIZER_FILE_PATH=$(python -c "
	# from huggingface_hub import hf_hub_download
	# # Download the file from the Hugging Face Hub
	# downloaded_path = hf_hub_download(
	# repo_id='${{ matrix.hf_model_repo }}',
	# filename='${TOKENIZER_FILE}'
	# )
	# print(downloaded_path)
	# ")
	# if [ -f "$DOWNLOADED_TOKENIZER_FILE_PATH" ]; then
	# echo "${TOKENIZER_FILE} downloaded successfully at: $DOWNLOADED_TOKENIZER_FILE_PATH"
	# python -m extension.llm.tokenizer.tokenizer -t $DOWNLOADED_TOKENIZER_FILE_PATH -o ./${TOKENIZER_BIN_FILE}
	# ls ./tokenizer.bin
	# else
	# echo "Failed to download ${TOKENIZER_FILE} from ${{ matrix.hf_model_repo }}."
	# exit 1
	# fi

	# python -m extension.export_util.export_hf_model -hfm=${{ matrix.hf_model_repo }} -o ${ET_MODEL_NAME}

	# cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
	# echo "::endgroup::"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Add qnn 16a16w quantization test #6256

Workflow file

Add qnn 16a16w quantization test #6256

Uh oh!

Jobs

Run details

Workflow file for this run