diff --git a/.ci/scripts/export_optimum_et_coreml.sh b/.ci/scripts/export_optimum_et_coreml.sh new file mode 100644 index 00000000000..7634ed65c86 --- /dev/null +++ b/.ci/scripts/export_optimum_et_coreml.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright (c) Qualcomm Innovation Center, Inc. +# All rights reserved +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +set -exu + +source "$(dirname "${BASH_SOURCE[0]}")/utils.sh" + +MODEL_NAME="$1" +OUTPUT_DIR="$2" +MODEL_ID="" +TASK="" +RECIPE="" + +# Map model name to model_id +case "$MODEL_NAME" in + smollm) + MODEL_ID="HuggingFaceTB/SmolLM2-135M" + TASK="text-generation" + RECIPE="coreml_llm_4bit" + ;; + llama3) + MODEL_ID="NousResearch/Llama-3.2-1B" + TASK="text-generation" + RECIPE="coreml_llm_4bit" + ;; + *) + echo "Error: Unknown model name '$MODEL_NAME'" + exit 1 + ;; +esac + + + +# Call the CLI tool with the resolved model_id +echo "Exporting model: $MODEL_NAME (ID: $MODEL_ID, TASK: $TASK, RECIPE: $RECIPE)" +optimum-cli export executorch \ + --model "${MODEL_ID}" \ + --task "${TASK}" \ + --recipe "${RECIPE}" \ + --output_dir ${OUTPUT_DIR} diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 4baa4ec3b75..b3326cc644d 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -18,557 +18,590 @@ concurrency: cancel-in-progress: true jobs: - test-models-macos: - name: test-models-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - # Mac runners are expensive and limited, and non reliable. - # Do some basic testing for macos jobs, and rely mostly on - # test-models-linux-aarch64 job instead. - model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l] - backend: [xnnpack-quantization-delegation] - include: - - model: efficient_sam - backend: portable - - model: llama - backend: portable - - model: llama3_2_vision_encoder - backend: portable - - model: mv3 - backend: portable - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - MODEL_NAME=${{ matrix.model }} - BUILD_TOOL=cmake - BACKEND=${{ matrix.backend }} - - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - # Build and test executorch - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" - - test-models-linux-aarch64: - name: test-models-linux-aarch64 - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe] - backend: [portable, xnnpack-quantization-delegation] - runner: [linux.arm64.2xlarge] - include: - - model: lstm - backend: portable - runner: linux.arm64.2xlarge - - model: mul - backend: portable - runner: linux.arm64.2xlarge - - model: softmax - backend: portable - runner: linux.arm64.2xlarge - - model: phi_4_mini - backend: portable - runner: linux.arm64.m7g.4xlarge - - model: qwen2_5 - backend: portable - runner: linux.arm64.2xlarge - - model: llama3_2_vision_encoder - backend: portable - runner: linux.arm64.2xlarge - fail-fast: false - with: - runner: ${{ matrix.runner }} - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - MODEL_NAME=${{ matrix.model }} - BUILD_TOOL="cmake" - BACKEND=${{ matrix.backend }} - - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" - # Build and test ExecuTorch - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" - - test-custom-ops-macos: - name: test-custom-ops-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - include: - - build-tool: cmake - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - BUILD_TOOL=${{ matrix.build-tool }} + # test-models-macos: + # name: test-models-macos + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # # Mac runners are expensive and limited, and non reliable. + # # Do some basic testing for macos jobs, and rely mostly on + # # test-models-linux-aarch64 job instead. + # model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l] + # backend: [xnnpack-quantization-delegation] + # include: + # - model: efficient_sam + # backend: portable + # - model: llama + # backend: portable + # - model: llama3_2_vision_encoder + # backend: portable + # - model: mv3 + # backend: portable + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # MODEL_NAME=${{ matrix.model }} + # BUILD_TOOL=cmake + # BACKEND=${{ matrix.backend }} - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - # Build and test custom ops - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + # # Build and test executorch + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" + + # test-models-linux-aarch64: + # name: test-models-linux-aarch64 + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe] + # backend: [portable, xnnpack-quantization-delegation] + # runner: [linux.arm64.2xlarge] + # include: + # - model: lstm + # backend: portable + # runner: linux.arm64.2xlarge + # - model: mul + # backend: portable + # runner: linux.arm64.2xlarge + # - model: softmax + # backend: portable + # runner: linux.arm64.2xlarge + # - model: phi_4_mini + # backend: portable + # runner: linux.arm64.m7g.4xlarge + # - model: qwen2_5 + # backend: portable + # runner: linux.arm64.2xlarge + # - model: llama3_2_vision_encoder + # backend: portable + # runner: linux.arm64.2xlarge + # fail-fast: false + # with: + # runner: ${{ matrix.runner }} + # docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" - test-selective-build-macos: - name: test-selective-build-macos - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - include: - - build-tool: cmake - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - BUILD_TOOL=${{ matrix.build-tool }} + # MODEL_NAME=${{ matrix.model }} + # BUILD_TOOL="cmake" + # BACKEND=${{ matrix.backend }} - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - # Build and test selective build - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" - - test-demo-backend-delegation: - name: test-demo-backend-delegation - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - include: - - build-tool: buck2 - - build-tool: cmake - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL=${{ matrix.build-tool }} - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" - # Test selective build - PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" - - test-arm-backend: - name: test-arm-backend - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - include: - - test_arm_baremetal: test_pytest_ops_ethosu_fvp - - test_arm_baremetal: test_pytest_models_ethosu_fvp - - test_arm_baremetal: test_run_ethosu_fvp - - test_arm_baremetal: test_models_tosa - - test_arm_baremetal: test_models_ethos-u55 - - test_arm_baremetal: test_models_ethos-u85 - fail-fast: false - with: - runner: linux.2xlarge.memory - docker-image: executorch-ubuntu-22.04-arm-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" + # # Build and test ExecuTorch + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" - source .ci/scripts/utils.sh - install_executorch "--use-pt-pinned-commit" + # test-custom-ops-macos: + # name: test-custom-ops-macos + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # include: + # - build-tool: cmake + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # BUILD_TOOL=${{ matrix.build-tool }} - .ci/scripts/setup-arm-baremetal-tools.sh + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + # # Build and test custom ops + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" - # Increase number of files user can monitor to bypass buck failures. - # Hopefully this is high enough for this setup. - sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 + # test-selective-build-macos: + # name: test-selective-build-macos + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # include: + # - build-tool: cmake + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # BUILD_TOOL=${{ matrix.build-tool }} - ARM_TEST=${{ matrix.test_arm_baremetal }} + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + # # Build and test selective build + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" + + # test-demo-backend-delegation: + # name: test-demo-backend-delegation + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # include: + # - build-tool: buck2 + # - build-tool: cmake + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL=${{ matrix.build-tool }} + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" + # # Test selective build + # PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" + + # test-arm-backend: + # name: test-arm-backend + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # include: + # - test_arm_baremetal: test_pytest_ops_ethosu_fvp + # - test_arm_baremetal: test_pytest_models_ethosu_fvp + # - test_arm_baremetal: test_run_ethosu_fvp + # - test_arm_baremetal: test_models_tosa + # - test_arm_baremetal: test_models_ethos-u55 + # - test_arm_baremetal: test_models_ethos-u85 + # fail-fast: false + # with: + # runner: linux.2xlarge.memory + # docker-image: executorch-ubuntu-22.04-arm-sdk + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" - # Test test_arm_baremetal.sh with test - backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}" + # source .ci/scripts/utils.sh + # install_executorch "--use-pt-pinned-commit" - test-arm-cortex-m-size-test: - name: test-arm-cortex-m-size-test - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - os: [bare_metal, zephyr-preset] - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-arm-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0" - setup_script_args="" - if [[ ${{ matrix.os}} == "bare_metal" ]]; then - toolchain_prefix=arm-none-eabi- - threshold="103268" # ~100KiB - toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake - elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then - setup_script_args="--target-toolchain zephyr" - toolchain_prefix=arm-zephyr-eabi- - threshold="133120" # should be ~125KB, set threshold to 130KB - toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake - else - echo "Fail unsupport OS selection ${{ matrix.os }}" - exit 1 - fi - - source .ci/scripts/utils.sh - install_executorch "--use-pt-pinned-commit" - .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args} - source examples/arm/ethos-u-scratch/setup_path.sh - - # User toolchain - ${toolchain_prefix}c++ --version - - # Setup cmake target to desired toolchain - toolchain_cmake=$(realpath ${toolchain_cmake}) - - # Build and run size test - if [[ ${{ matrix.os}} == "bare_metal" ]]; then - bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON" - elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then - CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out . - cmake --build cmake-out -j9 --target install --config Release - CXXFLAGS=${cxx_flags} cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test - cmake --build cmake-out/test -j9 --config Release - else - echo "Fail unsupport OS selection ${{ matrix.os }}" - exit 1 - fi - - elf="cmake-out/test/size_test" - - # Dump basic info - ls -al ${elf} - ${toolchain_prefix}size ${elf} - - # Dump symbol - python .github/scripts/run_nm.py -e ${elf} - python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}" - python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}" - - # Add basic guard - TODO: refine this! - ${toolchain_prefix}strip ${elf} - output=$(ls -la ${elf}) - arr=($output) - size=${arr[4]} - echo "size: $size, threshold: $threshold" - if [[ "$size" -le "$threshold" ]]; then - echo "Success $size <= $threshold" - else - echo "Fail $size > $threshold" - exit 1 - fi - - nxp-build-test: - name: nxp-build-test - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-arm-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - # Build - cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out . - cmake --build cmake-out --target executorch_delegate_neutron --config Release - - # Build check for the neutron backend library - lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a" - if [ -f $lib_neutron ]; then - echo "Neutron backend library built." - else - echo "Neutron backend library not found!" - exit 1 - fi - - test-coreml-delegate: - name: test-coreml-delegate - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - with: - runner: macos-latest-xlarge - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - BUILD_TOOL=cmake + # .ci/scripts/setup-arm-baremetal-tools.sh - bash .ci/scripts/setup-conda.sh - # Setup MacOS dependencies as there is no Docker support on MacOS atm - GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - # Build and test coreml delegate - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh + # # Increase number of files user can monitor to bypass buck failures. + # # Hopefully this is high enough for this setup. + # sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 - test-static-llama-ane: - name: test-static-llama-ane - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - set -eux - bash .ci/scripts/setup-conda.sh - eval "$(conda shell.bash hook)" + # ARM_TEST=${{ matrix.test_arm_baremetal }} - # Install requirements - ${CONDA_RUN} sh install_requirements.sh - ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh - ${CONDA_RUN} python install_executorch.py - ${CONDA_RUN} sh examples/models/llama/install_requirements.sh + # # Test test_arm_baremetal.sh with test + # backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}" - # Test ANE llama - ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh + # test-arm-cortex-m-size-test: + # name: test-arm-cortex-m-size-test + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # os: [bare_metal, zephyr-preset] + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-arm-sdk + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0" + # setup_script_args="" + # if [[ ${{ matrix.os}} == "bare_metal" ]]; then + # toolchain_prefix=arm-none-eabi- + # threshold="103268" # ~100KiB + # toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake + # elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then + # setup_script_args="--target-toolchain zephyr" + # toolchain_prefix=arm-zephyr-eabi- + # threshold="133120" # should be ~125KB, set threshold to 130KB + # toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake + # else + # echo "Fail unsupport OS selection ${{ matrix.os }}" + # exit 1 + # fi + + # source .ci/scripts/utils.sh + # install_executorch "--use-pt-pinned-commit" + # .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args} + # source examples/arm/ethos-u-scratch/setup_path.sh + + # # User toolchain + # ${toolchain_prefix}c++ --version + + # # Setup cmake target to desired toolchain + # toolchain_cmake=$(realpath ${toolchain_cmake}) + + # # Build and run size test + # if [[ ${{ matrix.os}} == "bare_metal" ]]; then + # bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON" + # elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then + # CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out . + # cmake --build cmake-out -j9 --target install --config Release + # CXXFLAGS=${cxx_flags} cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test + # cmake --build cmake-out/test -j9 --config Release + # else + # echo "Fail unsupport OS selection ${{ matrix.os }}" + # exit 1 + # fi + + # elf="cmake-out/test/size_test" + + # # Dump basic info + # ls -al ${elf} + # ${toolchain_prefix}size ${elf} + + # # Dump symbol + # python .github/scripts/run_nm.py -e ${elf} + # python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}" + # python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}" + + # # Add basic guard - TODO: refine this! + # ${toolchain_prefix}strip ${elf} + # output=$(ls -la ${elf}) + # arr=($output) + # size=${arr[4]} + # echo "size: $size, threshold: $threshold" + # if [[ "$size" -le "$threshold" ]]; then + # echo "Success $size <= $threshold" + # else + # echo "Fail $size > $threshold" + # exit 1 + # fi + + # nxp-build-test: + # name: nxp-build-test + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-arm-sdk + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # # Build + # cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out . + # cmake --build cmake-out --target executorch_delegate_neutron --config Release + + # # Build check for the neutron backend library + # lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a" + # if [ -f $lib_neutron ]; then + # echo "Neutron backend library built." + # else + # echo "Neutron backend library not found!" + # exit 1 + # fi + + # test-coreml-delegate: + # name: test-coreml-delegate + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # with: + # runner: macos-latest-xlarge + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # BUILD_TOOL=cmake - test-llama-torchao-lowbit: - name: test-llama-torchao-lowbit - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - script: | - set -eux - bash .ci/scripts/setup-conda.sh - eval "$(conda shell.bash hook)" + # bash .ci/scripts/setup-conda.sh + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + # # Build and test coreml delegate + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh - # Install requirements - ${CONDA_RUN} python install_executorch.py - ${CONDA_RUN} sh examples/models/llama/install_requirements.sh - - # Run test - ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh - - test-llama-runner-linux: - # Test Both linux x86 and linux aarch64 - name: test-llama-runner-linux - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - dtype: [fp32] - mode: [portable, xnnpack+custom] - runner: [linux.2xlarge, linux.arm64.2xlarge] - docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] - include: - - dtype: bf16 - mode: portable - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - - dtype: bf16 - mode: portable - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - - dtype: bf16 - mode: custom - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - # Excluding specific runner + docker image combinations that don't make sense: - # - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge) - # - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge) - exclude: - - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-gcc11-aarch64 - - runner: linux.arm64.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - fail-fast: false - with: - runner: ${{ matrix.runner }} - docker-image: ${{ matrix.docker-image }} - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - DTYPE=${{ matrix.dtype }} - BUILD_TOOL="cmake" - MODE=${{ matrix.mode }} - ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" - ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" - - # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" - - test-llama-runner-macos: - name: test-llama-runner-mac - uses: pytorch/test-infra/.github/workflows/macos_job.yml@main - strategy: - matrix: - dtype: [fp32] - mode: [mps, coreml, xnnpack+custom+quantize_kv] - fail-fast: false - with: - runner: macos-m1-stable - python-version: '3.11' - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | + # test-static-llama-ane: + # name: test-static-llama-ane + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # set -eux + # bash .ci/scripts/setup-conda.sh + # eval "$(conda shell.bash hook)" - DTYPE=${{ matrix.dtype }} - MODE=${{ matrix.mode }} + # # Install requirements + # ${CONDA_RUN} sh install_requirements.sh + # ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh + # ${CONDA_RUN} python install_executorch.py + # ${CONDA_RUN} sh examples/models/llama/install_requirements.sh - bash .ci/scripts/setup-conda.sh + # # Test ANE llama + # ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh - # Setup executorch - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake + # test-llama-torchao-lowbit: + # name: test-llama-torchao-lowbit + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # script: | + # set -eux + # bash .ci/scripts/setup-conda.sh + # eval "$(conda shell.bash hook)" + + # # Install requirements + # ${CONDA_RUN} python install_executorch.py + # ${CONDA_RUN} sh examples/models/llama/install_requirements.sh + + # # Run test + # ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh + + # test-llama-runner-linux: + # # Test Both linux x86 and linux aarch64 + # name: test-llama-runner-linux + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # dtype: [fp32] + # mode: [portable, xnnpack+custom] + # runner: [linux.2xlarge, linux.arm64.2xlarge] + # docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] + # include: + # - dtype: bf16 + # mode: portable + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # - dtype: bf16 + # mode: portable + # runner: linux.arm64.2xlarge + # docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + # - dtype: bf16 + # mode: custom + # runner: linux.arm64.2xlarge + # docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + # # Excluding specific runner + docker image combinations that don't make sense: + # # - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge) + # # - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge) + # exclude: + # - runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-gcc11-aarch64 + # - runner: linux.arm64.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # fail-fast: false + # with: + # runner: ${{ matrix.runner }} + # docker-image: ${{ matrix.docker-image }} + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # DTYPE=${{ matrix.dtype }} + # BUILD_TOOL="cmake" + # MODE=${{ matrix.mode }} + # ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" + # ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" + + # # Setup executorch + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" + # # Install requirements for export_llama + # PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh + # # Test llama2 + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" + + # test-llama-runner-macos: + # name: test-llama-runner-mac + # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # strategy: + # matrix: + # dtype: [fp32] + # mode: [mps, coreml, xnnpack+custom+quantize_kv] + # fail-fast: false + # with: + # runner: macos-m1-stable + # python-version: '3.11' + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | - if [[ "${MODE}" == "coreml" ]]; then - # Install coreml delegate - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh - echo "Finishing installing coreml." - fi + # DTYPE=${{ matrix.dtype }} + # MODE=${{ matrix.mode }} - # Install requirements for export_llama - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" + # bash .ci/scripts/setup-conda.sh - # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. - # test-llava-runner-macos: - # name: test-llava-runner-macos + # # Setup executorch + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake + + # if [[ "${MODE}" == "coreml" ]]; then + # # Install coreml delegate + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh + # echo "Finishing installing coreml." + # fi + + # # Install requirements for export_llama + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh + # # Test llama2 + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" + + # # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. + # # test-llava-runner-macos: + # # name: test-llava-runner-macos + # # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main + # # strategy: + # # fail-fast: false + # # with: + # # runner: macos-14-xlarge + # # python-version: '3.11' + # # submodules: 'recursive' + # # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # # timeout: 900 + # # script: | + # # BUILD_TOOL=cmake + + # # bash .ci/scripts/setup-conda.sh + # # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + + # # # install Llava requirements + # # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh + # # ${CONDA_RUN} bash examples/models/llava/install_requirements.sh + + # # # run python unittest + # # ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava + + # # # run e2e (export, tokenizer and runner) + # # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh + + # test-qnn-model: + # name: test-qnn-model + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # dtype: [fp32] + # model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-qnn-sdk + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" + + # test-qnn-optimum-model: + # name: test-qnn-optimum-model + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # dtype: [fp32] + # model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-qnn-sdk + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" + + # test-apple-model: + # name: test-apple-model # uses: pytorch/test-infra/.github/workflows/macos_job.yml@main # strategy: # fail-fast: false # with: - # runner: macos-14-xlarge + # runner: macos-m1-stable # python-version: '3.11' # submodules: 'recursive' # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - # timeout: 900 + # timeout: 90 # script: | # BUILD_TOOL=cmake # bash .ci/scripts/setup-conda.sh - # # Setup MacOS dependencies as there is no Docker support on MacOS atm - # GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - # # install Llava requirements - # ${CONDA_RUN} bash examples/models/llama/install_requirements.sh - # ${CONDA_RUN} bash examples/models/llava/install_requirements.sh - - # # run python unittest - # ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava - - # # run e2e (export, tokenizer and runner) - # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh - - test-qnn-model: - name: test-qnn-model - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - dtype: [fp32] - model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-qnn-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" - - test-qnn-optimum-model: - name: test-qnn-optimum-model - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - dtype: [fp32] - model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-qnn-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" - - test-apple-model: - name: test-apple-model + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh + # echo "Finishing installing coreml." + + # # Build and test coreml model + # MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) + # for MODEL_NAME in "${MODELS[@]}"; do + # echo "::group::Exporting coreml model: $MODEL_NAME" + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" + # echo "::endgroup::" + + # echo "::group::Exporting mps model: $MODEL_NAME" + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" + # echo "::endgroup::" + # done + + test-coreml-optimum-executorch: + name: test-coreml-optimum-executorch uses: pytorch/test-infra/.github/workflows/macos_job.yml@main strategy: fail-fast: false @@ -579,224 +612,233 @@ jobs: ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} timeout: 90 script: | - BUILD_TOOL=cmake - + set -eux bash .ci/scripts/setup-conda.sh + eval "$(conda shell.bash hook)" - # Setup MacOS dependencies as there is no Docker support on MacOS atm - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh - echo "Finishing installing coreml." - - # Build and test coreml model - MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) - for MODEL_NAME in "${MODELS[@]}"; do - echo "::group::Exporting coreml model: $MODEL_NAME" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" - echo "::endgroup::" + # Install requirements + ${CONDA_RUN} python install_executorch.py - echo "::group::Exporting mps model: $MODEL_NAME" - PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" - echo "::endgroup::" - done + # # Setup MacOS dependencies as there is no Docker support on MacOS atm + # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" - test-huggingface-transformers: - # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway - if: ${{ !github.event.pull_request.head.repo.fork }} - name: test-huggingface-transformers - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - secrets: inherit - strategy: - matrix: - hf_model_id: [ - google/gemma-3-1b-it, - Qwen/Qwen3-0.6B, - HuggingFaceTB/SmolLM2-135M, - meta-llama/Llama-3.2-1B, - allenai/OLMo-1B-hf, - ] - fail-fast: false - with: - secrets-env: EXECUTORCH_HF_TOKEN - runner: linux.2xlarge.memory - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - upload-artifact: profiling-artifacts-${{ strategy.job-index }} - script: | - echo "::group::Set up ExecuTorch" - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake - # Build executor_runner with ETdump enabled - PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DEXECUTORCH_ENABLE_LOGGING=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_DEVTOOLS=ON \ - -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ - -Bcmake-out . - cmake --build cmake-out -j16 --target install --config Release - echo "::endgroup::" + pip list echo "::group::Set up Hugging Face" pip install -U "huggingface_hub[cli]" - huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) git clone https://github.com/huggingface/optimum-executorch pushd optimum-executorch # There is no release yet, for CI stability, always test from the same commit on main - git checkout $OPTIMUM_ET_COMMIT - python install_dev.py --skip_override_torch - popd - pip list - echo "::endgroup::" - - echo "::group::Export to ExecuTorch" - # Pass matrix variable as environment variable - export MODEL_ID="${{ matrix.hf_model_id }}" - export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w" - pushd optimum-executorch - - ARGS=( - "--model" "${MODEL_ID}" - "--task" "text-generation" - "--recipe" "xnnpack" - "--use_custom_sdpa" - "--use_custom_kv_cache" - "--qlinear" - "--qembedding" - "--output_dir" "${OUTPUT_DIR}" - ) - - optimum-cli export executorch "${ARGS[@]}" - - ls -FlAGhp ${OUTPUT_DIR} - popd - echo "::endgroup::" - - echo "::group::Inference using python API" - pushd optimum-executorch - python -c " - import os - from optimum.executorch import ExecuTorchModelForCausalLM - from transformers import AutoTokenizer - - model_id = os.getenv('MODEL_ID') - pte_dir = os.getenv('OUTPUT_DIR') - print(f'Loading model {model_id} from {pte_dir}.') - model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir) - generated_text = model.text_generation( - tokenizer=AutoTokenizer.from_pretrained(model_id), - prompt='Simply put, the theory of relativity states that', - max_seq_len=64 - ) - print(generated_text) - " + # git checkout $OPTIMUM_ET_COMMIT + gh pr checkout 93 + ${CONDA_RUN} python install_dev.py --skip_override_torch popd + ${CONDA_RUN} pip list echo "::endgroup::" - echo "::group::Inference using executor_runner with ETDump" - ./cmake-out/executor_runner \ - --model_path ${OUTPUT_DIR}/model.pte \ - --etdump_path ${OUTPUT_DIR}/etdump.etdp - - export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv - mkdir -p $(dirname "$TSV_PATH") - python3 -m devtools.inspector.inspector_cli \ - --etdump_path ${OUTPUT_DIR}/etdump.etdp \ - --tsv_path ${TSV_PATH} - - echo "::endgroup::" - + MODELS=(smollm llama3) + for MODEL_NAME in "${MODELS[@]}"; do + ${CONDA_RUN} bash .ci/scripts/export_optimum_et_coreml.sh "${MODEL_NAME}" "/tmp/${MODEL_NAME}" + done - test-llama-runner-qnn-linux: - name: test-llama-runner-qnn-linux - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - strategy: - matrix: - dtype: [fp32] - pt2e_quantize: [qnn_16a16w, qnn_8a8w] - mode: [qnn] - fail-fast: false - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-qnn-sdk - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 900 - script: | - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" - - BUILD_TOOL="cmake" - DTYPE=${{ matrix.dtype }} - MODE=${{ matrix.mode }} - PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} - - ./install_requirements.sh --use-pt-pinned-commit - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh - PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh - - # Setup executorch - PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" - # Install requirements for export_llama - PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh - # Test llama2 - PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}" - - unittest-release: - uses: ./.github/workflows/_unittest.yml - permissions: - id-token: write - contents: read - with: - build-mode: Release - build-tool: cmake - docker-image: executorch-ubuntu-22.04-clang12 - - unittest-nxp-neutron: - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main - permissions: - id-token: write - contents: read - with: - runner: linux.2xlarge - docker-image: executorch-ubuntu-22.04-clang12 - submodules: 'recursive' - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - timeout: 90 - script: | - set -eux + # test-huggingface-transformers: + # # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway + # if: ${{ !github.event.pull_request.head.repo.fork }} + # name: test-huggingface-transformers + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # secrets: inherit + # strategy: + # matrix: + # hf_model_id: [ + # google/gemma-3-1b-it, + # Qwen/Qwen3-0.6B, + # HuggingFaceTB/SmolLM2-135M, + # meta-llama/Llama-3.2-1B, + # allenai/OLMo-1B-hf, + # ] + # fail-fast: false + # with: + # secrets-env: EXECUTORCH_HF_TOKEN + # runner: linux.2xlarge.memory + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # upload-artifact: profiling-artifacts-${{ strategy.job-index }} + # script: | + # echo "::group::Set up ExecuTorch" + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake + # # Build executor_runner with ETdump enabled + # PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \ + # -DCMAKE_INSTALL_PREFIX=cmake-out \ + # -DEXECUTORCH_ENABLE_LOGGING=1 \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ + # -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ + # -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ + # -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ + # -DEXECUTORCH_BUILD_XNNPACK=ON \ + # -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ + # -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ + # -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ + # -DEXECUTORCH_BUILD_DEVTOOLS=ON \ + # -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ + # -Bcmake-out . + # cmake --build cmake-out -j16 --target install --config Release + # echo "::endgroup::" + + # echo "::group::Set up Hugging Face" + # pip install -U "huggingface_hub[cli]" + # huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN + # OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt) + # git clone https://github.com/huggingface/optimum-executorch + # pushd optimum-executorch + # # There is no release yet, for CI stability, always test from the same commit on main + # git checkout $OPTIMUM_ET_COMMIT + # python install_dev.py --skip_override_torch + # popd + # pip list + # echo "::endgroup::" + + # echo "::group::Export to ExecuTorch" + # # Pass matrix variable as environment variable + # export MODEL_ID="${{ matrix.hf_model_id }}" + # export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w" + # pushd optimum-executorch + + # ARGS=( + # "--model" "${MODEL_ID}" + # "--task" "text-generation" + # "--recipe" "xnnpack" + # "--use_custom_sdpa" + # "--use_custom_kv_cache" + # "--qlinear" + # "--qembedding" + # "--output_dir" "${OUTPUT_DIR}" + # ) + + # optimum-cli export executorch "${ARGS[@]}" + + # ls -FlAGhp ${OUTPUT_DIR} + # popd + # echo "::endgroup::" + + # echo "::group::Inference using python API" + # pushd optimum-executorch + # python -c " + # import os + # from optimum.executorch import ExecuTorchModelForCausalLM + # from transformers import AutoTokenizer + + # model_id = os.getenv('MODEL_ID') + # pte_dir = os.getenv('OUTPUT_DIR') + # print(f'Loading model {model_id} from {pte_dir}.') + # model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir) + # generated_text = model.text_generation( + # tokenizer=AutoTokenizer.from_pretrained(model_id), + # prompt='Simply put, the theory of relativity states that', + # max_seq_len=64 + # ) + # print(generated_text) + # " + # popd + # echo "::endgroup::" + + # echo "::group::Inference using executor_runner with ETDump" + # ./cmake-out/executor_runner \ + # --model_path ${OUTPUT_DIR}/model.pte \ + # --etdump_path ${OUTPUT_DIR}/etdump.etdp + + # export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv + # mkdir -p $(dirname "$TSV_PATH") + # python3 -m devtools.inspector.inspector_cli \ + # --etdump_path ${OUTPUT_DIR}/etdump.etdp \ + # --tsv_path ${TSV_PATH} + + # echo "::endgroup::" + + + # test-llama-runner-qnn-linux: + # name: test-llama-runner-qnn-linux + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # strategy: + # matrix: + # dtype: [fp32] + # pt2e_quantize: [qnn_16a16w, qnn_8a8w] + # mode: [qnn] + # fail-fast: false + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-qnn-sdk + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 900 + # script: | + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" + + # BUILD_TOOL="cmake" + # DTYPE=${{ matrix.dtype }} + # MODE=${{ matrix.mode }} + # PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} + + # ./install_requirements.sh --use-pt-pinned-commit + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh + # PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh + + # # Setup executorch + # PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" + # # Install requirements for export_llama + # PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh + # # Test llama2 + # PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}" + + # unittest-release: + # uses: ./.github/workflows/_unittest.yml + # permissions: + # id-token: write + # contents: read + # with: + # build-mode: Release + # build-tool: cmake + # docker-image: executorch-ubuntu-22.04-clang12 + + # unittest-nxp-neutron: + # uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + # permissions: + # id-token: write + # contents: read + # with: + # runner: linux.2xlarge + # docker-image: executorch-ubuntu-22.04-clang12 + # submodules: 'recursive' + # ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + # timeout: 90 + # script: | + # set -eux - # The generic Linux job chooses to use base env, not the one setup by the image - CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") - conda activate "${CONDA_ENV}" + # # The generic Linux job chooses to use base env, not the one setup by the image + # CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") + # conda activate "${CONDA_ENV}" - # Build and install Executorch - PYTHON_EXECUTABLE=python \ - CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \ - .ci/scripts/setup-linux.sh --build-tool "cmake" + # # Build and install Executorch + # PYTHON_EXECUTABLE=python \ + # CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \ + # .ci/scripts/setup-linux.sh --build-tool "cmake" - # Install test requirements - pip install -r backends/nxp/requirements-tests.txt + # # Install test requirements + # pip install -r backends/nxp/requirements-tests.txt - # Run pytest - PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh + # # Run pytest + # PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh