Use nullptr to represent fallback kernels #12285
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: trunk | |
on: | |
push: | |
branches: | |
- main | |
- release/* | |
tags: | |
- ciflow/trunk/* | |
pull_request: | |
paths: | |
- .ci/docker/ci_commit_pins/pytorch.txt | |
- .ci/scripts/** | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
test-models-macos: | |
name: test-models-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
# Mac runners are expensive and limited, and non reliable. | |
# Do some basic testing for macos jobs, and rely mostly on | |
# test-models-linux-aarch64 job instead. | |
model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l] | |
backend: [xnnpack-quantization-delegation] | |
include: | |
- model: efficient_sam | |
backend: portable | |
- model: llama | |
backend: portable | |
- model: llama3_2_vision_encoder | |
backend: portable | |
- model: mv3 | |
backend: portable | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
MODEL_NAME=${{ matrix.model }} | |
BUILD_TOOL=cmake | |
BACKEND=${{ matrix.backend }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" | |
test-models-linux-aarch64: | |
name: test-models-linux-aarch64 | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe] | |
backend: [portable, xnnpack-quantization-delegation] | |
runner: [linux.arm64.2xlarge] | |
include: | |
- model: lstm | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: mul | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: softmax | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: phi_4_mini | |
backend: portable | |
runner: linux.arm64.m7g.4xlarge | |
- model: qwen2_5 | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: llama3_2_vision_encoder | |
backend: portable | |
runner: linux.arm64.2xlarge | |
fail-fast: false | |
with: | |
runner: ${{ matrix.runner }} | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
MODEL_NAME=${{ matrix.model }} | |
BUILD_TOOL="cmake" | |
BACKEND=${{ matrix.backend }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Build and test ExecuTorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" | |
test-custom-ops-macos: | |
name: test-custom-ops-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test custom ops | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" | |
test-selective-build-macos: | |
name: test-selective-build-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test selective build | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" | |
test-demo-backend-delegation: | |
name: test-demo-backend-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
include: | |
- build-tool: buck2 | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL=${{ matrix.build-tool }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Test selective build | |
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" | |
test-arm-backend: | |
name: test-arm-backend | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
include: | |
- test_arm_baremetal: test_pytest_ops_ethosu_fvp | |
- test_arm_baremetal: test_pytest_models_ethosu_fvp | |
- test_arm_baremetal: test_run_ethosu_fvp | |
- test_arm_baremetal: test_models_tosa | |
- test_arm_baremetal: test_models_ethos-u55 | |
- test_arm_baremetal: test_models_ethos-u85 | |
fail-fast: false | |
with: | |
runner: linux.2xlarge.memory | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch "--use-pt-pinned-commit" | |
.ci/scripts/setup-arm-baremetal-tools.sh | |
# Increase number of files user can monitor to bypass buck failures. | |
# Hopefully this is high enough for this setup. | |
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 | |
ARM_TEST=${{ matrix.test_arm_baremetal }} | |
# Test test_arm_baremetal.sh with test | |
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}" | |
test-arm-cortex-m-size-test: | |
name: test-arm-cortex-m-size-test | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch "--use-pt-pinned-commit" | |
.ci/scripts/setup-arm-baremetal-tools.sh | |
source examples/arm/ethos-u-scratch/setup_path.sh | |
# User baremetal toolchain | |
arm-none-eabi-c++ --version | |
toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake | |
toolchain_cmake=$(realpath ${toolchain_cmake}) | |
# Build and test size test | |
bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON" | |
elf="cmake-out/test/size_test" | |
# Dump basic info | |
ls -al ${elf} | |
arm-none-eabi-size ${elf} | |
# Dump symbols | |
python .github/scripts/run_nm.py -e ${elf} | |
python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "arm-none-eabi-" | |
python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "arm-none-eabi-" | |
# Add basic guard - TODO: refine this! | |
arm-none-eabi-strip ${elf} | |
output=$(ls -la ${elf}) | |
arr=($output) | |
size=${arr[4]} | |
threshold="103068" # ~100KiB | |
echo "size: $size, threshold: $threshold" | |
if [[ "$size" -le "$threshold" ]]; then | |
echo "Success $size <= $threshold" | |
else | |
echo "Fail $size > $threshold" | |
exit 1 | |
fi | |
test-coreml-delegate: | |
name: test-coreml-delegate | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-latest-xlarge | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh | |
test-static-llama-ane: | |
name: test-static-llama-ane | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
bash .ci/scripts/setup-conda.sh | |
eval "$(conda shell.bash hook)" | |
# Install requirements | |
${CONDA_RUN} sh install_requirements.sh | |
${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh | |
${CONDA_RUN} python install_executorch.py | |
${CONDA_RUN} sh examples/models/llama/install_requirements.sh | |
# Test ANE llama | |
${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh | |
test-llama-torchao-lowbit: | |
name: test-llama-torchao-lowbit | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
bash .ci/scripts/setup-conda.sh | |
eval "$(conda shell.bash hook)" | |
# Install requirements | |
${CONDA_RUN} python install_executorch.py | |
${CONDA_RUN} sh examples/models/llama/install_requirements.sh | |
# Run test | |
${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh | |
test-llama-runner-linux: | |
# Test Both linux x86 and linux aarch64 | |
name: test-llama-runner-linux | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
dtype: [fp32] | |
mode: [portable, xnnpack+custom] | |
runner: [linux.2xlarge, linux.arm64.2xlarge] | |
docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] | |
include: | |
- dtype: bf16 | |
mode: portable | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
- dtype: bf16 | |
mode: portable | |
runner: linux.arm64.2xlarge | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
- dtype: bf16 | |
mode: custom | |
runner: linux.arm64.2xlarge | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
# Excluding specific runner + docker image combinations that don't make sense: | |
# - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge) | |
# - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge) | |
exclude: | |
- runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
- runner: linux.arm64.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
fail-fast: false | |
with: | |
runner: ${{ matrix.runner }} | |
docker-image: ${{ matrix.docker-image }} | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
DTYPE=${{ matrix.dtype }} | |
BUILD_TOOL="cmake" | |
MODE=${{ matrix.mode }} | |
ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" | |
ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" | |
# Setup executorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" | |
test-llama-runner-macos: | |
name: test-llama-runner-mac | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
mode: [mps, coreml, xnnpack+custom+quantize_kv] | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake | |
if [[ "${MODE}" == "coreml" ]]; then | |
# Install coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
fi | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" | |
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. | |
# test-llava-runner-macos: | |
# name: test-llava-runner-macos | |
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
# strategy: | |
# fail-fast: false | |
# with: | |
# runner: macos-14-xlarge | |
# python-version: '3.11' | |
# submodules: 'recursive' | |
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
# timeout: 900 | |
# script: | | |
# BUILD_TOOL=cmake | |
# bash .ci/scripts/setup-conda.sh | |
# # Setup MacOS dependencies as there is no Docker support on MacOS atm | |
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# # install Llava requirements | |
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh | |
# # run python unittest | |
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava | |
# # run e2e (export, tokenizer and runner) | |
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh | |
test-qnn-model: | |
name: test-qnn-model | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
dtype: [fp32] | |
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-qnn-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" | |
test-apple-model: | |
name: test-apple-model | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
# Build and test coreml model | |
MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) | |
for MODEL_NAME in "${MODELS[@]}"; do | |
echo "::group::Exporting coreml model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" | |
echo "::endgroup::" | |
echo "::group::Exporting mps model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" | |
echo "::endgroup::" | |
done | |
test-huggingface-transformers: | |
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway | |
if: ${{ !github.event.pull_request.head.repo.fork }} | |
name: test-huggingface-transformers | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
secrets: inherit | |
strategy: | |
matrix: | |
hf_model_id: [ | |
google/gemma-3-1b-it, | |
Qwen/Qwen3-0.6B, | |
HuggingFaceTB/SmolLM2-135M, | |
meta-llama/Llama-3.2-1B, | |
allenai/OLMo-1B-hf, | |
] | |
fail-fast: false | |
with: | |
secrets-env: EXECUTORCH_HF_TOKEN | |
runner: linux.2xlarge.memory | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
upload-artifact: profiling-artifacts-${{ strategy.job-index }} | |
script: | | |
echo "::group::Set up ExecuTorch" | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake | |
# Build executor_runner with ETdump enabled | |
PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \ | |
-DCMAKE_INSTALL_PREFIX=cmake-out \ | |
-DEXECUTORCH_ENABLE_LOGGING=1 \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_XNNPACK=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | |
-DEXECUTORCH_BUILD_DEVTOOLS=ON \ | |
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ | |
-Bcmake-out . | |
cmake --build cmake-out -j16 --target install --config Release | |
echo "::endgroup::" | |
echo "::group::Set up Hugging Face" | |
pip install -U "huggingface_hub[cli]" | |
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
git clone https://github.com/huggingface/optimum-executorch | |
pushd optimum-executorch | |
# There is no release yet, for CI stability, always test from the same commit on main | |
git checkout da80c9e35b3db5c7eea8731b7d660482fb4870a8 | |
pip install .[tests] | |
popd | |
if [ "${{ matrix.hf_model_id }}" == "google/gemma-3-1b-it" ]; then | |
# Fixes for gemma-3 is not available in the released version | |
git clone https://github.com/huggingface/transformers.git | |
pushd transformers | |
git checkout a57274466f7f72efaa2662d1738cdaf28ae8071f | |
pip install -e . | |
popd | |
fi | |
pip list | |
echo "::endgroup::" | |
echo "::group::Export to ExecuTorch" | |
# Pass matrix variable as environment variable | |
export MODEL_ID="${{ matrix.hf_model_id }}" | |
export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_8da4w" | |
pushd optimum-executorch | |
optimum-cli export executorch \ | |
--model ${MODEL_ID} \ | |
--task text-generation \ | |
--recipe xnnpack \ | |
--use_custom_sdpa \ | |
--output_dir ${OUTPUT_DIR} \ | |
--qlinear | |
ls -FlAGhp ${OUTPUT_DIR} | |
popd | |
echo "::endgroup::" | |
echo "::group::Inference using python API" | |
pushd optimum-executorch | |
python -c " | |
import os | |
from optimum.executorch import ExecuTorchModelForCausalLM | |
from transformers import AutoTokenizer | |
model_id = os.getenv('MODEL_ID') | |
pte_dir = os.getenv('OUTPUT_DIR') | |
print(f'Loading model {model_id} from {pte_dir}.') | |
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir) | |
generated_text = model.text_generation( | |
tokenizer=AutoTokenizer.from_pretrained(model_id), | |
prompt='Simply put, the theory of relativity states that', | |
max_seq_len=64 | |
) | |
print(generated_text) | |
" | |
popd | |
echo "::endgroup::" | |
echo "::group::Inference using executor_runner with ETDump" | |
./cmake-out/executor_runner \ | |
--model_path ${OUTPUT_DIR}/model.pte \ | |
--etdump_path ${OUTPUT_DIR}/etdump.etdp | |
export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv | |
mkdir -p $(dirname "$TSV_PATH") | |
python3 -m devtools.inspector.inspector_cli \ | |
--etdump_path ${OUTPUT_DIR}/etdump.etdp \ | |
--tsv_path ${TSV_PATH} | |
echo "::endgroup::" | |
test-llama-runner-qnn-linux: | |
name: test-llama-runner-qnn-linux | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
dtype: [fp32] | |
pt2e_quantize: [qnn_16a16w, qnn_8a8w] | |
mode: [qnn] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-qnn-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL="cmake" | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} | |
./install_requirements.sh --use-pt-pinned-commit | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}" | |
unittest-release: | |
uses: ./.github/workflows/_unittest.yml | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
build-mode: Release | |
build-tool: cmake | |
docker-image: executorch-ubuntu-22.04-clang12 | |
unittest-nxp-neutron: | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
set -eux | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
# Build and install Executorch | |
PYTHON_EXECUTABLE=python \ | |
CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \ | |
.ci/scripts/setup-linux.sh --build-tool "cmake" | |
# Install test requirements | |
pip install -r backends/nxp/requirements-tests.txt | |
# Run pytest | |
PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh |