From 157a8afe1428129f3041a5d36adc221fcb88ceef Mon Sep 17 00:00:00 2001
From: Chen Lai <chenlai@fb.com>
Date: Thu, 2 Oct 2025 17:46:05 -0700
Subject: [PATCH] debug

---
 .github/workflows/pull.yml  | 2110 ++++++++++++++++-----------------
 .github/workflows/trunk.yml | 2226 +++++++++++++++++------------------
 2 files changed, 2168 insertions(+), 2168 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index c15fadd102f..886008b6bd8 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -1,1055 +1,1055 @@
-name: pull
-
-on:
-  pull_request:
-  push:
-    branches:
-      - main
-      - release/*
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
-  cancel-in-progress: true
-
-jobs:
-  test-qnn-wheel-packages-linux:
-    name: test-qnn-wheel-packages-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-      matrix:
-        python-version: [ "3.10", "3.11", "3.12" ]
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 180
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Create a clean env for each python version
-        conda create -y -n test_env_${{ matrix.python-version }} python=${{ matrix.python-version }}
-        conda activate test_env_${{ matrix.python-version }}
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_wheel_package_qnn.sh "${{ matrix.python-version }}"
-
-  test-setup-linux-gcc:
-    name: test-setup-linux-gcc
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Build and test ExecuTorch with the add model on portable backend.
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable"
-
-  test-models-linux-basic:
-    name: test-models-linux-basic
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        model: [mv3, vit]
-        backend: [portable, xnnpack-quantization-delegation]
-        build-tool: [cmake, buck2]
-        runner: [linux.2xlarge, linux.arm64.2xlarge]
-        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
-        # Excluding specific runner + docker image combinations that don't make sense:
-        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
-        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
-        exclude:
-          - runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-          - runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-          # TODO: Need to figure out why buck2 doesnt work on Graviton instances.
-          - runner: linux.arm64.2xlarge
-            build-tool: buck2
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ci-image:${{ matrix.docker-image }}
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=${{ matrix.build-tool }}
-        BACKEND=${{ matrix.backend }}
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Build and test ExecuTorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-  test-models-linux:
-    name: test-models-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        model: [linear, add, add_mul, ic3, mv2, resnet18, resnet50, mobilebert, emformer_transcribe]
-        backend: [portable, xnnpack-quantization-delegation]
-        runner: [linux.2xlarge]
-        include:
-          - model: ic4
-            backend: portable
-            runner: linux.4xlarge.memory
-          - model: ic4
-            backend: xnnpack-quantization-delegation
-            runner: linux.4xlarge.memory
-          - model: emformer_join
-            backend: portable
-            runner: linux.4xlarge.memory
-          - model: emformer_join
-            backend: xnnpack-quantization-delegation
-            runner: linux.4xlarge.memory
-          - model: phi_4_mini
-            backend: portable
-            runner: linux.4xlarge.memory
-          - model: llama3_2_vision_encoder
-            backend: portable
-            runner: linux.4xlarge.memory
-          - model: w2l
-            backend: portable
-            runner: linux.4xlarge.memory
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=cmake
-        BACKEND=${{ matrix.backend }}
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Build and test ExecuTorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-  test-llama-runner-linux:
-    # Test Both linux x86 and linux aarch64
-    name: test-llama-runner-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        mode: [xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
-        runner: [linux.2xlarge, linux.arm64.2xlarge]
-        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
-        include:
-          - dtype: bf16
-            mode: custom
-            runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-        # Excluding specific runner + docker image combinations that don't make sense:
-        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
-        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
-        exclude:
-          - runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-          - runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ci-image:${{ matrix.docker-image }}
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        DTYPE=${{ matrix.dtype }}
-        BUILD_TOOL="cmake"
-        MODE=${{ matrix.mode }}
-        ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
-        ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
-
-  test-llama-runner-linux-android:
-    name: test-llama-runner-linux-android
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python \
-        bash .ci/scripts/build_llama_android.sh  "${BUILD_TOOL}"
-
-  test-custom-ops-linux:
-    name: test-custom-ops-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Test custom ops
-        PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
-
-  test-selective-build-linux:
-    name: test-selective-build-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Test selective build
-        PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
-
-  test-multimodal-linux:
-    if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-multimodal-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    secrets: inherit
-    strategy:
-      fail-fast: false
-      matrix:
-        model: ["gemma3-4b"]  # llava gives segfault so not covering.
-    with:
-      secrets-env: EXECUTORCH_HF_TOKEN
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        echo "::group::Setup ExecuTorch"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-        echo "::endgroup::"
-
-        echo "::group::Setup Huggingface"
-        pip install -U "huggingface_hub[cli]" accelerate
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
-        echo "::endgroup::"
-
-        echo "::group::Test ${{ matrix.model }}"
-        python .ci/scripts/test_huggingface_optimum_model.py --model ${{ matrix.model }} --quantize --recipe xnnpack
-        echo "::endgroup::"
-
-  test-moshi-linux:
-    name: test-moshi-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # install Mimi requirements
-        bash examples/models/moshi/mimi/install_requirements.sh
-
-        # reinstall executorch
-        bash ./install_executorch.sh --minimal
-
-        # run python unittest
-        python -m unittest examples.models.moshi.mimi.test_mimi
-
-  test-quantized-aot-lib-linux:
-    name: test-quantized-aot-lib-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
-
-  test-binary-size-linux-gcc:
-    name: test-binary-size-linux-gcc
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        ./install_requirements.sh --use-pt-pinned-commit
-        # build module for executorch.extension.pybindings.portable_lib
-        bash test/build_size_test.sh
-        strip cmake-out/test/size_test
-        output=$(ls -la cmake-out/test/size_test)
-        arr=($output)
-        size=${arr[4]}
-        # threshold=48120 on devserver with gcc11.4
-        # todo(lfq): update once binary size is below 50kb.
-        threshold="63776"
-        if [[ "$size" -le "$threshold" ]]; then
-          echo "Success $size <= $threshold"
-        else
-          echo "Fail $size > $threshold"
-          exit 1
-        fi
-
-  test-binary-size-linux:
-    name: test-binary-size-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        ./install_requirements.sh --use-pt-pinned-commit
-
-        # build module for executorch.extension.pybindings.portable_lib
-        bash test/build_size_test.sh
-        strip cmake-out/test/size_test
-        output=$(ls -la cmake-out/test/size_test)
-        arr=($output)
-        size=${arr[4]}
-        threshold="51752"
-        if [[ "$size" -le "$threshold" ]]; then
-          echo "Success $size <= $threshold"
-        else
-          echo "Fail $size > $threshold"
-          exit 1
-        fi
-
-  android:
-    uses: ./.github/workflows/_android.yml
-    permissions:
-      id-token: write
-      contents: read
-
-  unittest:
-    uses: ./.github/workflows/_unittest.yml
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      build-mode: Debug
-      build-tool: cmake
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-
-  unittest-editable:
-    uses: ./.github/workflows/_unittest.yml
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      build-mode: Debug
-      build-tool: cmake
-      editable: true
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-
-  unittest-buck:
-    uses: ./.github/workflows/_unittest.yml
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      build-mode: Debug
-      build-tool: buck2
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-
-  unittest-arm-backend-with-no-fvp:
-    name: unittest-arm-backend-with-no-fvp
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        include:
-          - test_arm_baremetal: test_pytest_ops
-          - test_arm_baremetal: test_pytest_models
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-
-        .ci/scripts/setup-arm-baremetal-tools.sh
-
-        ARM_TEST=${{ matrix.test_arm_baremetal }}
-
-        # Test test_arm_baremetal.sh with test
-        backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
-
-  test-llama-runner-qnn-linux:
-    name: test-llama-runner-qnn-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
-        mode: [qnn]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        DTYPE=${{ matrix.dtype }}
-        BUILD_TOOL="cmake"
-        MODE=${{ matrix.mode }}
-        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
-
-        ./install_requirements.sh --use-pt-pinned-commit
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
-
-  test-static-llama-qnn-linux:
-    name: test-static-llama-qnn-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 180
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-
-        # Setup install_requirements for llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-
-        # Test static llama weight sharing and accuracy
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
-
-  test-qnn-models-linux:
-    name: test-qnn-models-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 180
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
-        # reminder: make sure each job runs fast
-
-  test-phi-3-mini-runner-linux:
-    name: test-phi-3-mini-runner-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # install phi-3-mini requirements
-        bash examples/models/phi-3-mini/install_requirements.sh
-
-        # run e2e (export, tokenizer and runner)
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh Release
-
-  test-eval_llama-wikitext-linux:
-    name: test-eval_llama-wikitext-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # install llama requirements
-        bash examples/models/llama/install_requirements.sh
-
-        # run eval_llama wikitext task
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh
-
-  # TODO(larryliu0820): Fix this issue before reenabling it: https://gist.github.com/larryliu0820/7377ecd0d79dbc06076cec8d9f2b85d2
-  # test-eval_llama-mmlu-linux:
-  #   name: test-eval_llama-mmlu-linux
-  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-  #   permissions:
-  #     id-token: write
-  #     contents: read
-  #   strategy:
-  #     fail-fast: false
-  #   with:
-  #     runner: linux.24xlarge
-  #     docker-image: ci-image:executorch-ubuntu-22.04-clang12
-  #     submodules: 'recursive'
-  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-  #     timeout: 90
-  #     script: |
-  #       # The generic Linux job chooses to use base env, not the one setup by the image
-  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-  #       conda activate "${CONDA_ENV}"
-
-  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-  #       # install llama requirements
-  #       bash examples/models/llama/install_requirements.sh
-
-  #       # run eval_llama mmlu task
-  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
-
-  test-llama_runner_eager-linux:
-    name: test-llama_runner_eager-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # install llama requirements
-        bash examples/models/llama/install_requirements.sh
-
-        # run llama runner in eager mode
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
-
-  test-llama-lora-linux:
-    name: test-llama-lora-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # Install llama requirements
-        bash examples/models/llama/install_requirements.sh
-
-        # install a recent version of torchtune (>= 20250730)
-        PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250929  --extra-index-url https://download.pytorch.org/whl/nightly/cpu
-
-        # run llama runner in eager mode
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
-
-  test-mediatek-models-linux:
-    name: test-mediatek-models-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.24xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-mediatek-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-mediatek-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-mediatek-sdk.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "mv3" "buck2" "mediatek"
-        # placeholder for mediatek to add more tests
-
-  test-openvino-linux:
-    name: test-openvino-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    if: false # TODO Re-enable after fixing timeouts (#14314)
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc9
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-openvino.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_openvino.sh
-
-  test-build-wasm-linux:
-    name: test-build-wasm-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-
-        # Install Node.js and Emscripten
-        source .ci/scripts/setup-emscripten.sh
-
-        # Test selective build
-        PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh
-
-  unittest-wasm-bindings:
-    name: unittest-wasm-bindings
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        enable-etdump: ['', '--enable-etdump']
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-
-        # Install Node.js and Emscripten
-        source .ci/scripts/setup-emscripten.sh
-
-        # Test selective build
-        bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
-
-        # Install Jest
-        cd cmake-out-wasm/extension/wasm/test
-        npm install --save-dev jest
-
-        # Run unit test
-        npm test
-
-  unittest-nxp-neutron:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        set -eux
-
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Build and install Executorch
-        PYTHON_EXECUTABLE=python \
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
-        .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # Install test requirements
-        pip install -r backends/nxp/requirements-tests-pypi.txt
-        pip install -r backends/nxp/requirements-tests-eiq.txt
-
-        # Run pytest
-        PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
-
-        # Run aot examples:
-        PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh cifar10
-        PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
-
-
-  test-samsung-models-linux:
-    name: test-samsung-models-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    secrets: inherit
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      secrets-env: SAMSUNG_AI_LITECORE_KEY
-      script: |
-        set -ex
-
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Setup python
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # Setup Samsung SDK (AI Lite Core) and install enn backend
-        export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
-        source .ci/scripts/setup-samsung-linux-deps.sh
-
-        # Test models serially
-        models="mv2 ic3 resnet18 resnet50 mv3 ic4 dl3 edsr vit w2l"
-        for model in $models; do
-          python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
-        done
-
-        # Test ops
-        python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
-
-
-  test-vulkan-models-linux:
-    name: test-vulkan-models-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        set -eux
-
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
-        source .ci/scripts/setup-vulkan-linux-deps.sh
-
-        # Setup python
-        PYTHON_EXECUTABLE=python \
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
-        .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh --build
-
-        # Test models serially
-        models="mv2 mv3 edsr resnet18 resnet50 dl3"
-        for model in $models; do
-          python -m examples.vulkan.export --model_name=$model --test
-        done
-
-
-  test-vulkan-operators-linux:
-    name: test-vulkan-operators-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        set -eux
-
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
-        source .ci/scripts/setup-vulkan-linux-deps.sh
-
-        # Setup python
-        PYTHON_EXECUTABLE=python \
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
-        .ci/scripts/setup-linux.sh --build-tool "cmake"
-
-        # Custom operator tests
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
-        ./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
-        ./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
-        ./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
-        ./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
-        ./cmake-out/backends/vulkan/test/custom_ops/qdq8ta_conv2d_activations
-        ./cmake-out/backends/vulkan/test/custom_ops/q8ta_q8ta_q8to_add
-
-        # "Classic" Operator tests
-        PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_op.sh --build
-        # TODO(ssjia): figure out how to run custom op tests in CI. Currently, they are
-        # failing due to to the libstdc++.so.6 installed with conda not supporting
-        # GLIBCXX_3.4.30. These tests are still run in Meta internal CI.
-        # ./cmake-out/backends/vulkan/test/op_tests/vulkan_sdpa_test
-
-        # Run e2e testing for selected operators. More operators will be tested via this
-        # route in the future.
-        python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
-        python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
-
-  nxp-build-test:
-    name: nxp-build-test
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Build
-        cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
-        cmake --build cmake-out --target executorch_delegate_neutron --config Release
-
-        # Build check for the neutron backend library
-        lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
-        if [ -f $lib_neutron ]; then
-            echo "Neutron backend library built."
-        else
-            echo "Neutron backend library not found!"
-            exit 1
-        fi
+# name: pull
+
+# on:
+#   pull_request:
+#   push:
+#     branches:
+#       - main
+#       - release/*
+#   workflow_dispatch:
+
+# concurrency:
+#   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+#   cancel-in-progress: true
+
+# jobs:
+#   test-qnn-wheel-packages-linux:
+#     name: test-qnn-wheel-packages-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         python-version: [ "3.10", "3.11", "3.12" ]
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 180
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Create a clean env for each python version
+#         conda create -y -n test_env_${{ matrix.python-version }} python=${{ matrix.python-version }}
+#         conda activate test_env_${{ matrix.python-version }}
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_wheel_package_qnn.sh "${{ matrix.python-version }}"
+
+#   test-setup-linux-gcc:
+#     name: test-setup-linux-gcc
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test ExecuTorch with the add model on portable backend.
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable"
+
+#   test-models-linux-basic:
+#     name: test-models-linux-basic
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         model: [mv3, vit]
+#         backend: [portable, xnnpack-quantization-delegation]
+#         build-tool: [cmake, buck2]
+#         runner: [linux.2xlarge, linux.arm64.2xlarge]
+#         docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
+#         # Excluding specific runner + docker image combinations that don't make sense:
+#         #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+#         #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+#         exclude:
+#           - runner: linux.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#           - runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-clang12
+#           # TODO: Need to figure out why buck2 doesnt work on Graviton instances.
+#           - runner: linux.arm64.2xlarge
+#             build-tool: buck2
+#       fail-fast: false
+#     with:
+#       runner: ${{ matrix.runner }}
+#       docker-image: ci-image:${{ matrix.docker-image }}
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         MODEL_NAME=${{ matrix.model }}
+#         BUILD_TOOL=${{ matrix.build-tool }}
+#         BACKEND=${{ matrix.backend }}
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test ExecuTorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
+#   test-models-linux:
+#     name: test-models-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         model: [linear, add, add_mul, ic3, mv2, resnet18, resnet50, mobilebert, emformer_transcribe]
+#         backend: [portable, xnnpack-quantization-delegation]
+#         runner: [linux.2xlarge]
+#         include:
+#           - model: ic4
+#             backend: portable
+#             runner: linux.4xlarge.memory
+#           - model: ic4
+#             backend: xnnpack-quantization-delegation
+#             runner: linux.4xlarge.memory
+#           - model: emformer_join
+#             backend: portable
+#             runner: linux.4xlarge.memory
+#           - model: emformer_join
+#             backend: xnnpack-quantization-delegation
+#             runner: linux.4xlarge.memory
+#           - model: phi_4_mini
+#             backend: portable
+#             runner: linux.4xlarge.memory
+#           - model: llama3_2_vision_encoder
+#             backend: portable
+#             runner: linux.4xlarge.memory
+#           - model: w2l
+#             backend: portable
+#             runner: linux.4xlarge.memory
+#       fail-fast: false
+#     with:
+#       runner: ${{ matrix.runner }}
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         MODEL_NAME=${{ matrix.model }}
+#         BUILD_TOOL=cmake
+#         BACKEND=${{ matrix.backend }}
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test ExecuTorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
+#   test-llama-runner-linux:
+#     # Test Both linux x86 and linux aarch64
+#     name: test-llama-runner-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         mode: [xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
+#         runner: [linux.2xlarge, linux.arm64.2xlarge]
+#         docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
+#         include:
+#           - dtype: bf16
+#             mode: custom
+#             runner: linux.2xlarge
+#             docker-image: executorch-ubuntu-22.04-clang12
+#         # Excluding specific runner + docker image combinations that don't make sense:
+#         #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+#         #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+#         exclude:
+#           - runner: linux.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#           - runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-clang12
+#       fail-fast: false
+#     with:
+#       runner: ${{ matrix.runner }}
+#       docker-image: ci-image:${{ matrix.docker-image }}
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         DTYPE=${{ matrix.dtype }}
+#         BUILD_TOOL="cmake"
+#         MODE=${{ matrix.mode }}
+#         ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
+#         ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
+
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Install requirements for export_llama
+#         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+#         # Test llama2
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
+
+#   test-llama-runner-linux-android:
+#     name: test-llama-runner-linux-android
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python \
+#         bash .ci/scripts/build_llama_android.sh  "${BUILD_TOOL}"
+
+#   test-custom-ops-linux:
+#     name: test-custom-ops-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Test custom ops
+#         PYTHON_EXECUTABLE=python bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
+
+#   test-selective-build-linux:
+#     name: test-selective-build-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Test selective build
+#         PYTHON_EXECUTABLE=python bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
+
+#   test-multimodal-linux:
+#     if: ${{ !github.event.pull_request.head.repo.fork }}
+#     name: test-multimodal-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     secrets: inherit
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         model: ["gemma3-4b"]  # llava gives segfault so not covering.
+#     with:
+#       secrets-env: EXECUTORCH_HF_TOKEN
+#       runner: linux.24xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         echo "::group::Setup ExecuTorch"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+#         echo "::endgroup::"
+
+#         echo "::group::Setup Huggingface"
+#         pip install -U "huggingface_hub[cli]" accelerate
+#         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+#         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+#         pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
+#         echo "::endgroup::"
+
+#         echo "::group::Test ${{ matrix.model }}"
+#         python .ci/scripts/test_huggingface_optimum_model.py --model ${{ matrix.model }} --quantize --recipe xnnpack
+#         echo "::endgroup::"
+
+#   test-moshi-linux:
+#     name: test-moshi-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # install Mimi requirements
+#         bash examples/models/moshi/mimi/install_requirements.sh
+
+#         # reinstall executorch
+#         bash ./install_executorch.sh --minimal
+
+#         # run python unittest
+#         python -m unittest examples.models.moshi.mimi.test_mimi
+
+#   test-quantized-aot-lib-linux:
+#     name: test-quantized-aot-lib-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
+
+#   test-binary-size-linux-gcc:
+#     name: test-binary-size-linux-gcc
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         ./install_requirements.sh --use-pt-pinned-commit
+#         # build module for executorch.extension.pybindings.portable_lib
+#         bash test/build_size_test.sh
+#         strip cmake-out/test/size_test
+#         output=$(ls -la cmake-out/test/size_test)
+#         arr=($output)
+#         size=${arr[4]}
+#         # threshold=48120 on devserver with gcc11.4
+#         # todo(lfq): update once binary size is below 50kb.
+#         threshold="63776"
+#         if [[ "$size" -le "$threshold" ]]; then
+#           echo "Success $size <= $threshold"
+#         else
+#           echo "Fail $size > $threshold"
+#           exit 1
+#         fi
+
+#   test-binary-size-linux:
+#     name: test-binary-size-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         ./install_requirements.sh --use-pt-pinned-commit
+
+#         # build module for executorch.extension.pybindings.portable_lib
+#         bash test/build_size_test.sh
+#         strip cmake-out/test/size_test
+#         output=$(ls -la cmake-out/test/size_test)
+#         arr=($output)
+#         size=${arr[4]}
+#         threshold="51752"
+#         if [[ "$size" -le "$threshold" ]]; then
+#           echo "Success $size <= $threshold"
+#         else
+#           echo "Fail $size > $threshold"
+#           exit 1
+#         fi
+
+#   android:
+#     uses: ./.github/workflows/_android.yml
+#     permissions:
+#       id-token: write
+#       contents: read
+
+#   unittest:
+#     uses: ./.github/workflows/_unittest.yml
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       build-mode: Debug
+#       build-tool: cmake
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+
+#   unittest-editable:
+#     uses: ./.github/workflows/_unittest.yml
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       build-mode: Debug
+#       build-tool: cmake
+#       editable: true
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+
+#   unittest-buck:
+#     uses: ./.github/workflows/_unittest.yml
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       build-mode: Debug
+#       build-tool: buck2
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+
+#   unittest-arm-backend-with-no-fvp:
+#     name: unittest-arm-backend-with-no-fvp
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         include:
+#           - test_arm_baremetal: test_pytest_ops
+#           - test_arm_baremetal: test_pytest_models
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+
+#         .ci/scripts/setup-arm-baremetal-tools.sh
+
+#         ARM_TEST=${{ matrix.test_arm_baremetal }}
+
+#         # Test test_arm_baremetal.sh with test
+#         backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
+
+#   test-llama-runner-qnn-linux:
+#     name: test-llama-runner-qnn-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         pt2e_quantize: [qnn_16a16w, qnn_8a8w]
+#         mode: [qnn]
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         DTYPE=${{ matrix.dtype }}
+#         BUILD_TOOL="cmake"
+#         MODE=${{ matrix.mode }}
+#         PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
+
+#         ./install_requirements.sh --use-pt-pinned-commit
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Install requirements for export_llama
+#         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+#         # Test llama2
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
+
+#   test-static-llama-qnn-linux:
+#     name: test-static-llama-qnn-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 180
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+
+#         # Setup install_requirements for llama
+#         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+
+#         # Test static llama weight sharing and accuracy
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama.sh
+
+#   test-qnn-models-linux:
+#     name: test-qnn-models-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 180
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
+#         # reminder: make sure each job runs fast
+
+#   test-phi-3-mini-runner-linux:
+#     name: test-phi-3-mini-runner-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.24xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # install phi-3-mini requirements
+#         bash examples/models/phi-3-mini/install_requirements.sh
+
+#         # run e2e (export, tokenizer and runner)
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_phi_3_mini.sh Release
+
+#   test-eval_llama-wikitext-linux:
+#     name: test-eval_llama-wikitext-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.24xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # install llama requirements
+#         bash examples/models/llama/install_requirements.sh
+
+#         # run eval_llama wikitext task
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_wikitext.sh
+
+#   # TODO(larryliu0820): Fix this issue before reenabling it: https://gist.github.com/larryliu0820/7377ecd0d79dbc06076cec8d9f2b85d2
+#   # test-eval_llama-mmlu-linux:
+#   #   name: test-eval_llama-mmlu-linux
+#   #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#   #   permissions:
+#   #     id-token: write
+#   #     contents: read
+#   #   strategy:
+#   #     fail-fast: false
+#   #   with:
+#   #     runner: linux.24xlarge
+#   #     docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#   #     submodules: 'recursive'
+#   #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#   #     timeout: 90
+#   #     script: |
+#   #       # The generic Linux job chooses to use base env, not the one setup by the image
+#   #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#   #       conda activate "${CONDA_ENV}"
+
+#   #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#   #       # install llama requirements
+#   #       bash examples/models/llama/install_requirements.sh
+
+#   #       # run eval_llama mmlu task
+#   #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_eval_llama_mmlu.sh
+
+#   test-llama_runner_eager-linux:
+#     name: test-llama_runner_eager-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.24xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # install llama requirements
+#         bash examples/models/llama/install_requirements.sh
+
+#         # run llama runner in eager mode
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_runner_eager.sh
+
+#   test-llama-lora-linux:
+#     name: test-llama-lora-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.24xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # Install llama requirements
+#         bash examples/models/llama/install_requirements.sh
+
+#         # install a recent version of torchtune (>= 20250730)
+#         PYTHON_EXECUTABLE=python python -m pip install torchtune==0.7.0.dev20250929  --extra-index-url https://download.pytorch.org/whl/nightly/cpu
+
+#         # run llama runner in eager mode
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama_lora.sh
+
+#   test-mediatek-models-linux:
+#     name: test-mediatek-models-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.24xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-mediatek-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-mediatek-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-mediatek-sdk.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "mv3" "buck2" "mediatek"
+#         # placeholder for mediatek to add more tests
+
+#   test-openvino-linux:
+#     name: test-openvino-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     if: false # TODO Re-enable after fixing timeouts (#14314)
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-gcc9
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-openvino.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_openvino.sh
+
+#   test-build-wasm-linux:
+#     name: test-build-wasm-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+
+#         # Install Node.js and Emscripten
+#         source .ci/scripts/setup-emscripten.sh
+
+#         # Test selective build
+#         PYTHON_EXECUTABLE=python bash examples/wasm/test_build_wasm.sh
+
+#   unittest-wasm-bindings:
+#     name: unittest-wasm-bindings
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         enable-etdump: ['', '--enable-etdump']
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+
+#         # Install Node.js and Emscripten
+#         source .ci/scripts/setup-emscripten.sh
+
+#         # Test selective build
+#         bash scripts/build_wasm_tests.sh ${{ matrix.enable-etdump }}
+
+#         # Install Jest
+#         cd cmake-out-wasm/extension/wasm/test
+#         npm install --save-dev jest
+
+#         # Run unit test
+#         npm test
+
+#   unittest-nxp-neutron:
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         set -eux
+
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Build and install Executorch
+#         PYTHON_EXECUTABLE=python \
+#         CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
+#         .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # Install test requirements
+#         pip install -r backends/nxp/requirements-tests-pypi.txt
+#         pip install -r backends/nxp/requirements-tests-eiq.txt
+
+#         # Run pytest
+#         PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
+
+#         # Run aot examples:
+#         PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh cifar10
+#         PYTHON_EXECUTABLE=python bash examples/nxp/run_aot_example.sh mobilenetv2
+
+
+#   test-samsung-models-linux:
+#     name: test-samsung-models-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     secrets: inherit
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12-android
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       secrets-env: SAMSUNG_AI_LITECORE_KEY
+#       script: |
+#         set -ex
+
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Setup python
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # Setup Samsung SDK (AI Lite Core) and install enn backend
+#         export SAMSUNG_AI_LITECORE_KEY=$SECRET_SAMSUNG_AI_LITECORE_KEY
+#         source .ci/scripts/setup-samsung-linux-deps.sh
+
+#         # Test models serially
+#         models="mv2 ic3 resnet18 resnet50 mv3 ic4 dl3 edsr vit w2l"
+#         for model in $models; do
+#           python -m executorch.examples.samsung.aot_compiler --model_name=$model -c E9955
+#         done
+
+#         # Test ops
+#         python -m unittest discover -s backends/samsung/test/ops -p "test_*.py"
+
+
+#   test-vulkan-models-linux:
+#     name: test-vulkan-models-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         set -eux
+
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+#         source .ci/scripts/setup-vulkan-linux-deps.sh
+
+#         # Setup python
+#         PYTHON_EXECUTABLE=python \
+#         CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
+#         .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_model.sh --build
+
+#         # Test models serially
+#         models="mv2 mv3 edsr resnet18 resnet50 dl3"
+#         for model in $models; do
+#           python -m examples.vulkan.export --model_name=$model --test
+#         done
+
+
+#   test-vulkan-operators-linux:
+#     name: test-vulkan-operators-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         set -eux
+
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+#         source .ci/scripts/setup-vulkan-linux-deps.sh
+
+#         # Setup python
+#         PYTHON_EXECUTABLE=python \
+#         CMAKE_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON" \
+#         .ci/scripts/setup-linux.sh --build-tool "cmake"
+
+#         # Custom operator tests
+#         PYTHON_EXECUTABLE=python bash backends/vulkan/test/custom_ops/build_and_run.sh add
+#         ./cmake-out/backends/vulkan/test/custom_ops/q8csw_linear
+#         ./cmake-out/backends/vulkan/test/custom_ops/q8csw_conv2d
+#         ./cmake-out/backends/vulkan/test/custom_ops/q4gsw_linear
+#         ./cmake-out/backends/vulkan/test/custom_ops/choose_qparams_per_row
+#         ./cmake-out/backends/vulkan/test/custom_ops/qdq8ta_conv2d_activations
+#         ./cmake-out/backends/vulkan/test/custom_ops/q8ta_q8ta_q8to_add
+
+#         # "Classic" Operator tests
+#         PYTHON_EXECUTABLE=python bash backends/vulkan/test/scripts/test_op.sh --build
+#         # TODO(ssjia): figure out how to run custom op tests in CI. Currently, they are
+#         # failing due to to the libstdc++.so.6 installed with conda not supporting
+#         # GLIBCXX_3.4.30. These tests are still run in Meta internal CI.
+#         # ./cmake-out/backends/vulkan/test/op_tests/vulkan_sdpa_test
+
+#         # Run e2e testing for selected operators. More operators will be tested via this
+#         # route in the future.
+#         python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*pt2e*"
+#         python -m unittest backends/vulkan/test/test_vulkan_delegate.py -k "*torchao*"
+
+#   nxp-build-test:
+#     name: nxp-build-test
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Build
+#         cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
+#         cmake --build cmake-out --target executorch_delegate_neutron --config Release
+
+#         # Build check for the neutron backend library
+#         lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
+#         if [ -f $lib_neutron ]; then
+#             echo "Neutron backend library built."
+#         else
+#             echo "Neutron backend library not found!"
+#             exit 1
+#         fi
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index ae3001ca920..a354105ea0f 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -1,1113 +1,1113 @@
-name: trunk
-
-on:
-  push:
-    branches:
-      - main
-      - release/*
-    tags:
-      - ciflow/trunk/*
-  pull_request:
-    paths:
-      - .ci/docker/ci_commit_pins/pytorch.txt
-      - .ci/scripts/**
-  workflow_dispatch:
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
-  cancel-in-progress: true
-
-jobs:
-  test-models-macos-cpu:
-    name: test-models-macos-cpu
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        # Mac runners are expensive and limited, and non reliable.
-        # Do some basic testing for macos jobs, and rely mostly on
-        # test-models-linux-aarch64 job instead.
-        model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
-        backend: [xnnpack-quantization-delegation]
-        include:
-          - model: efficient_sam
-            backend: portable
-          - model: llama
-            backend: portable
-          - model: llama3_2_vision_encoder
-            backend: portable
-          - model: mv3
-            backend: portable
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=cmake
-        BACKEND=${{ matrix.backend }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test executorch
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-#  test-models-arm-zephyr:
-#    name: test-models-arm-zephyr
-#    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
-#    strategy:
-#      matrix:
-#        model: [add, softmax, mv2]
-#      fail-fast: false
-#    with:
-#      runner: linux.2xlarge
-#      docker-image: ci-image:executorch-ubuntu-22.04-zephyr-sdk
-#      submodules: 'recursive'
-#      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-#      timeout: 120
-#      script: |
-#        MODEL_NAME=${{ matrix.model }}
-#        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-#        conda activate "${CONDA_ENV}"
-#        if [[ ${{ matrix.model}} == "add" ]]; then
-#          SIM_LIMIT_SEC=60
-#        elif [[ ${{ matrix.model}} == "softmax" ]]; then
-#          SIM_LIMIT_SEC=60
-#        elif [[ ${{ matrix.model}} == "mv2" ]]; then
-#          SIM_LIMIT_SEC=5000
-#        else
-#          echo "Failed unsupported model selection ${{ matrix.model }}"
-#          exit 1
-#        fi
-#
-#        source .ci/scripts/utils.sh
-#        source .ci/scripts/zephyr-utils.sh
-#        mkdir -p zephyr_scratch/
-#        cd zephyr_scratch
-#        export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
-#        export ARM_FVP_TUTORIALS_ROOT=$ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm-fvp-tutorials
-#
-#        # TODO @Bujji: Should see if this can be moved into the docker image itself
-#        download_arm_zephyr_sdk
-#        ./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
-#        cd $ZEPHYR_PROJ_ROOT
-#        setup_zephyr_et_module
-#
-#        # Run setup scripts for Arm FVP and Arm AOT Compilation
-#        cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
-#        install_executorch
-#        .ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
-#        source examples/arm/ethos-u-scratch/setup_path.sh
-#        source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
-#
-#        # Get the model as PTE
-#        python -m examples.arm.aot_arm_compiler \
-#            --model_name="${MODEL_NAME}" \
-#            --output="${MODEL_NAME}.pte"
-#
-#        # Generate the C-style header
-#        cd $ARM_FVP_TUTORIALS_ROOT
-#        python build_model.py \
-#            --executorch-root $ZEPHYR_PROJ_ROOT/modules/lib/executorch \
-#            --pte-file $ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte \
-#            --output-path $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/src/
-#
-#        cd $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/
-#
-#        # Build the zephyr elf
-#        west build -p always -b mps3/corstone300/fvp -- \
-#            -DET_PTE_FILE_PATH_FOR_SELECTIVE_BUILD=$ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte
-#
-#        # Run the simulation
-#        FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf \
-#            -C mps3_board.visualisation.disable-visualisation=1 \
-#            -C mps3_board.telnetterminal0.start_telnet=0 \
-#            -C mps3_board.uart0.out_file='sim.out'  \
-#            -C cpu0.CFGITCMSZ=15 \
-#            -C cpu0.CFGDTCMSZ=15 \
-#            --simlimit ${SIM_LIMIT_SEC}
-#
-#        # Disable exit on error
-#        set +e
-#        # Report failure if any of the ouptut verification checks fail
-#        grep -qF "ERROR" sim.out
-#        exit_status=$? #store 0 if found (failure), 1 if not (success)
-#        if [[ "$exit_status" -eq "0" ]]; then
-#            cat sim.out
-#            set -e
-#            exit 1
-#        fi
-#
-#        # Report fail if simulation does not complete successfully
-#        grep -qF "SUCCESS: Program complete, exiting." sim.out
-#        exit_status=$? #store 0 if found (success), 1 if not (failure)
-#        if [[ "$exit_status" -eq "1" ]]; then
-#            cat sim.out
-#            set -e
-#            exit 1
-#        fi
-#        # Re-enable exit on error
-#        set -e
-
-  test-models-linux-aarch64:
-    name: test-models-linux-aarch64
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
-        backend: [portable, xnnpack-quantization-delegation]
-        runner: [linux.arm64.2xlarge]
-        include:
-          - model: lstm
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: mul
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: softmax
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: phi_4_mini
-            backend: portable
-            runner: linux.arm64.m7g.4xlarge
-          - model: qwen2_5_1_5b
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: llama3_2_vision_encoder
-            backend: portable
-            runner: linux.arm64.2xlarge
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ci-image:executorch-ubuntu-22.04-gcc11-aarch64
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL="cmake"
-        BACKEND=${{ matrix.backend }}
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Build and test ExecuTorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-  test-custom-ops-macos:
-    name: test-custom-ops-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test custom ops
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
-
-  test-selective-build-macos:
-    name: test-selective-build-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test selective build
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
-
-  test-demo-backend-delegation:
-    name: test-demo-backend-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        include:
-          - build-tool: buck2
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL=${{ matrix.build-tool }}
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Test selective build
-        PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
-
-  test-arm-backend:
-    name: test-arm-backend
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        include:
-          - test_arm_baremetal: test_pytest_ops_ethosu_fvp
-          - test_arm_baremetal: test_pytest_models_ethosu_fvp
-          - test_arm_baremetal: test_run_ethosu_fvp
-          - test_arm_baremetal: test_models_tosa
-          - test_arm_baremetal: test_models_ethos-u55
-          - test_arm_baremetal: test_models_ethos-u85
-          - test_arm_baremetal: test_smaller_stories_llama
-      fail-fast: false
-    with:
-      runner: linux.2xlarge.memory
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 120
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-
-        .ci/scripts/setup-arm-baremetal-tools.sh
-
-        # Increase number of files user can monitor to bypass buck failures.
-        # Hopefully this is high enough for this setup.
-        sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
-
-        ARM_TEST=${{ matrix.test_arm_baremetal }}
-
-        # Test test_arm_baremetal.sh with test
-        backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
-
-  test-arm-cortex-m-size-test:
-    name: test-arm-cortex-m-size-test
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        os: [bare_metal, zephyr-preset]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
-        setup_script_args=""
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
-          toolchain_prefix=arm-none-eabi-
-          threshold="110592" # 108 KiB
-          toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
-          setup_script_args="--target-toolchain zephyr"
-          toolchain_prefix=arm-zephyr-eabi-
-          threshold="135168" # 132 KiB
-          toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
-        else
-          echo "Fail unsupport OS selection ${{ matrix.os }}"
-          exit 1
-        fi
-
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-        .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args}
-        source examples/arm/ethos-u-scratch/setup_path.sh
-
-        # User toolchain
-        ${toolchain_prefix}c++ --version
-
-        # Setup cmake target to desired toolchain
-        toolchain_cmake=$(realpath ${toolchain_cmake})
-
-        # Build and run size test
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
-          bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
-          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
-          cmake --build cmake-out -j9 --target install --config Release
-          CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
-          cmake --build cmake-out/test -j9 --config Release
-        else
-          echo "Fail unsupport OS selection ${{ matrix.os }}"
-          exit 1
-        fi
-
-        elf="cmake-out/test/size_test"
-
-        # Dump basic info
-        ls -al ${elf}
-        ${toolchain_prefix}size ${elf}
-
-        # Dump symbol
-        python .github/scripts/run_nm.py -e ${elf}
-        python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}"
-        python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}"
-
-        # Add basic guard - TODO: refine this!
-        ${toolchain_prefix}strip ${elf}
-        output=$(ls -la ${elf})
-        arr=($output)
-        size=${arr[4]}
-        echo "size: $size, threshold: $threshold"
-        if [[ "$size" -le "$threshold" ]]; then
-          echo "Success $size <= $threshold"
-        else
-          echo "Fail $size > $threshold"
-          exit 1
-        fi
-
-  test-arm-ootb-linux:
-    name: test-arm-ootb-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Follow the steps required before running the notebooks
-        # Try to mirror these as closely as possible
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-
-        .ci/scripts/setup-arm-baremetal-tools.sh
-        source examples/arm/ethos-u-scratch/setup_path.sh
-
-        # Install requirements for converting notebooks
-        pip install notebook
-
-        # Run OOTB tests
-        backends/arm/test/test_arm_ootb.sh
-
-  test-coreml-delegate:
-    name: test-coreml-delegate
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-14-xlarge
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        BUILD_TOOL=cmake
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test coreml delegate
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
-
-  test-static-llama-ane:
-    name: test-static-llama-ane
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        set -eux
-        bash .ci/scripts/setup-conda.sh
-        eval "$(conda shell.bash hook)"
-
-        # Install requirements
-        ${CONDA_RUN} sh install_requirements.sh
-        ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
-        ${CONDA_RUN} python install_executorch.py
-        ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
-
-        # Test ANE llama
-        ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
-
-  test-llama-torchao-lowbit:
-    name: test-llama-torchao-lowbit
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        set -eux
-        bash .ci/scripts/setup-conda.sh
-        eval "$(conda shell.bash hook)"
-
-        # Install requirements
-        ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
-        ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
-
-        # Run test
-        ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
-
-  test-llama-runner-linux:
-    # Test Both linux x86 and linux aarch64
-    name: test-llama-runner-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        mode: [portable, xnnpack+custom]
-        runner: [linux.2xlarge, linux.arm64.2xlarge]
-        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
-        include:
-          - dtype: bf16
-            mode: portable
-            runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-          - dtype: bf16
-            mode: portable
-            runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-          - dtype: bf16
-            mode: custom
-            runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-        # Excluding specific runner + docker image combinations that don't make sense:
-        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
-        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
-        exclude:
-          - runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-          - runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ci-image:${{ matrix.docker-image }}
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        DTYPE=${{ matrix.dtype }}
-        BUILD_TOOL="cmake"
-        MODE=${{ matrix.mode }}
-        ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
-        ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
-
-  test-llama-runner-macos:
-    name: test-llama-runner-mac
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        dtype: [fp32]
-        mode: [mps, coreml, xnnpack+custom+quantize_kv]
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-
-        DTYPE=${{ matrix.dtype }}
-        MODE=${{ matrix.mode }}
-
-        bash .ci/scripts/setup-conda.sh
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
-
-        if [[ "${MODE}" == "coreml" ]]; then
-          # Install coreml delegate
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
-          echo "Finishing installing coreml."
-        fi
-
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
-
-  test-torchao-huggingface-checkpoints:
-    name: test-torchao-huggingface-checkpoints
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        model: [qwen3_4b, phi_4_mini]
-        runner: [linux.2xlarge]
-        docker-image: [executorch-ubuntu-22.04-clang12]
-        backend: [xnnpack]
-        include:
-          - model: qwen3_4b
-            runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-            backend: torchao
-          - model: phi_4_mini
-            runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-            backend: torchao
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ci-image:${{ matrix.docker-image }}
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-
-        if [[ "${{ matrix.backend }}" == "torchao" ]]; then
-          BUILD_TORCHAO_EXPERIMENTAL=1 TORCHAO_BUILD_CPU_AARCH64=1 TORCHAO_BUILD_KLEIDIAI=1 TORCHAO_ENABLE_ARM_NEON_DOT=1 TORCHAO_PARALLEL_BACKEND=OPENMP pip install third-party/ao
-        fi
-
-        pip install -U "huggingface_hub[cli]"
-
-        bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.model != 'phi_4_mini' && '--test_with_runner' || '' }}  ${{ matrix.backend == 'torchao' && '--use_torchao_kernels' || '' }}
-
-  test-multimodal-macos:
-    if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-multimodal-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    secrets: inherit
-    strategy:
-      fail-fast: false
-      matrix:
-        model: ["gemma3-4b"] # llava gives segfault so not covering.
-    with:
-      secrets-env: EXECUTORCH_HF_TOKEN
-      runner: macos-15-xlarge
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        echo "::group::Set up ExecuTorch"
-        bash .ci/scripts/setup-conda.sh
-        eval "$(conda shell.bash hook)"
-
-        # Install requirements
-        ${CONDA_RUN} python install_executorch.py
-        echo "::endgroup::"
-
-        echo "::group::Set up Huggingface"
-        ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
-        ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        ${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
-        ${CONDA_RUN} pip list
-        echo "::endgroup::"
-
-        echo "::group::Test ${{ matrix.model }}"
-        ${CONDA_RUN} python .ci/scripts/test_huggingface_optimum_model.py --model ${{ matrix.model }} --quantize --recipe xnnpack
-        echo "::endgroup::"
-
-  test-qnn-model:
-    name: test-qnn-model
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
-
-  test-qnn-optimum-model:
-    name: test-qnn-optimum-model
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
-
-  test-models-macos-coreml:
-    name: test-models-macos-coreml
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        model: [dl3, edsr, efficient_sam, emformer_join, emformer_transcribe, ic3, ic4, mobilebert, mv2, mv3, resnet50, vit, w2l]
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=cmake
-        BACKEND="coreml-pybind"
-
-
-        # Set model specific overrides
-        if [[ "${MODEL_NAME}" == "mobilebert" ]]; then
-          # See https://github.com/pytorch/executorch/issues/12907
-          # mobilebert has nan output on FP16, and high MSE on fp32, so we disable runtime test now
-          BACKEND="coreml"
-        fi
-
-        if [[ "${MODEL_NAME}" == "efficient_sam" ]]; then
-          # See https://github.com/pytorch/executorch/issues/12906
-          # efficient_sam fails to run on CoreML
-          BACKEND="coreml"
-        fi
-
-        bash .ci/scripts/setup-conda.sh
-
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
-        echo "Finishing installing coreml."
-
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-  test-models-macos-mps:
-    name: test-models-macos-mps
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        BUILD_TOOL=cmake
-        bash .ci/scripts/setup-conda.sh
-
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-
-        # Build and test mps model
-        for MODEL_NAME in mv3 ic4 resnet50 edsr mobilebert w2l; do
-          echo "::group::Exporting mps model: $MODEL_NAME"
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
-          echo "::endgroup::"
-        done
-
-  test-huggingface-transformers-xnnpack:
-    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
-    if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-huggingface-transformers-xnnpack
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    secrets: inherit
-    strategy:
-      matrix:
-        config: [
-          # XNNPack.
-          llama3.2-1b|xnnpack|--quantize,
-          qwen3-0.6b|xnnpack|--quantize,
-          qwen3-1.7b|xnnpack|--quantize,
-          gemma3-1b|xnnpack|--quantize,
-          phi4-mini|xnnpack|--quantize,
-          smollm2-135m|xnnpack|--quantize,
-          smollm3-3b|xnnpack|--quantize
-        ]
-      fail-fast: false
-    with:
-      secrets-env: EXECUTORCH_HF_TOKEN
-      runner: linux.2xlarge.memory
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      upload-artifact: profiling-artifacts-${{ strategy.job-index }}
-      script: |
-        set -eux
-        IFS='|' read -r MODEL RECIPE QUANTIZE <<< "${{ matrix.config }}"
-        echo "Model: $MODEL"
-        echo "Recipe: $RECIPE"
-        echo "Quantize: $QUANTIZE"
-
-        echo "::group::Set up ExecuTorch"
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-        # Build executor_runner with ETdump enabled
-        PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
-          -DCMAKE_INSTALL_PREFIX=cmake-out \
-          -DEXECUTORCH_ENABLE_LOGGING=1 \
-          -DCMAKE_BUILD_TYPE=Release \
-          -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-          -DEXECUTORCH_BUILD_XNNPACK=ON \
-          -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-          -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-          -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
-          -DEXECUTORCH_BUILD_DEVTOOLS=ON \
-          -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
-          -Bcmake-out .
-        cmake --build cmake-out -j16 --target install --config Release
-        echo "::endgroup::"
-
-        echo "::group::Set up Hugging Face"
-        pip install -U "huggingface_hub[cli]"
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        git clone https://github.com/huggingface/optimum-executorch
-        pushd optimum-executorch
-        # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout $OPTIMUM_ET_COMMIT
-        python install_dev.py --skip_override_torch
-        popd
-        pip list
-        echo "::endgroup::"
-
-        echo "::group::Run tests"
-        export OUTPUT_DIR="$(pwd)/${MODEL}_${RECIPE}_${QUANTIZE}"
-        python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE} --model_dir ${OUTPUT_DIR}
-        echo "::endgroup::"
-
-        echo "::group::Generate artifacts for performance profiling"
-        ./cmake-out/executor_runner \
-          --model_path ${OUTPUT_DIR}/model.pte \
-          --etdump_path ${OUTPUT_DIR}/etdump.etdp
-
-        export TSV_PATH=artifacts-to-be-uploaded/${MODEL}_op_prof.tsv
-        mkdir -p $(dirname "$TSV_PATH")
-        python3 -m devtools.inspector.inspector_cli \
-          --etdump_path ${OUTPUT_DIR}/etdump.etdp \
-          --tsv_path ${TSV_PATH}
-        echo "::endgroup::"
-
-  test-huggingface-transformers-macos:
-    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
-    if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-huggingface-transformers-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    secrets: inherit
-    # Models below selected based on https://huggingface.co/models?pipeline_tag=text-generation&num_parameters=min:0,max:3B&sort=trending.
-    strategy:
-      matrix:
-        config: [
-          # # XNNPack. (Skipping for now due to intermittent segmentation faults, see https://github.com/huggingface/optimum-executorch/issues/122.)
-          # llama3.2-1b|xnnpack|--quantize,
-          # qwen3-0.6b|xnnpack|--quantize,
-          # qwen3-1.7b|xnnpack|--quantize,
-          # gemma3-1b|xnnpack|--quantize,
-          # phi4-mini|xnnpack|--quantize,
-          # smollm2-135m|xnnpack|--quantize,
-          # smollm3-3b|xnnpack|--quantize,
-          # qwen3-1.7b|xnnpack|--quantize,
-          # CoreML.
-          llama3.2-1b|coreml_fp32_gpu|--quantize,
-          qwen3-0.6b|coreml_fp32_gpu|--quantize,
-          smollm2-135m|coreml_fp32_gpu|--quantize,
-          olmo-1b|coreml_fp32_gpu|--quantize,
-          bert|coreml_fp32_gpu|--quantize,
-          distilbert|coreml_fp32_gpu|--quantize
-        ]
-      fail-fast: false
-    with:
-      secrets-env: EXECUTORCH_HF_TOKEN
-      runner: macos-15-xlarge
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        set -eux
-        IFS='|' read -r MODEL RECIPE QUANTIZE <<< "${{ matrix.config }}"
-        echo "Model: $MODEL"
-        echo "Recipe: $RECIPE"
-        echo "Quantize: $QUANTIZE"
-
-        echo "::group::Set up ExecuTorch"
-        bash .ci/scripts/setup-conda.sh
-        eval "$(conda shell.bash hook)"
-
-        # Install requirements
-        ${CONDA_RUN} python install_executorch.py
-        echo "::endgroup::"
-
-        echo "::group::Set up Hugging Face"
-        pip install -U "huggingface_hub[cli]"
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
-        OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
-        git clone https://github.com/huggingface/optimum-executorch
-        pushd optimum-executorch
-        # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout $OPTIMUM_ET_COMMIT
-        ${CONDA_RUN} python install_dev.py --skip_override_torch
-        popd
-        ${CONDA_RUN} pip list
-        echo "::endgroup::"
-
-        # Run test
-        ${CONDA_RUN} python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE}
-
-  test-llama-runner-qnn-linux:
-    name: test-llama-runner-qnn-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
-        mode: [qnn]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        DTYPE=${{ matrix.dtype }}
-        MODE=${{ matrix.mode }}
-        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
-
-        ./install_requirements.sh --use-pt-pinned-commit
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
-
-  # this is for filtering out the qnn changes such that qnn jobs only triggered when the specific files are changed
-  changes:
-    runs-on: ubuntu-latest
-    outputs:
-      qnn: ${{ steps.filter.outputs.qnn }}
-    steps:
-      - uses: actions/checkout@v4
-      - uses: dorny/paths-filter@v3
-        id: filter
-        with:
-          filters: |
-            qnn:
-              - 'backends/qualcomm/**'
-              - 'examples/qualcomm/**'
-              - 'examples/models/llama/**'
-
-  test-static-llama-qnn-eval-linux:
-    needs: changes # has dependency on changes jobs defined above
-    if: needs.changes.outputs.qnn == 'true'
-    name: test-static-llama-qnn-eval-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-      matrix:
-        config:
-          - name: "baseline"
-            flags: ""
-            threshold: 62.0
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 180
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        BUILD_TOOL="cmake"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Setup install_requirements for llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-
-        echo ">>> Running config: ${{ matrix.config.name }}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh \
-          --flags "${{ matrix.config.flags }}" \
-          --threshold "${{ matrix.config.threshold }}"
-
-  unittest-release:
-    uses: ./.github/workflows/_unittest.yml
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      build-mode: Release
-      build-tool: cmake
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-
-  test-mcu-models:
-    name: test-mcu-models
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
-
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Try to mirror these as closely as possible
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-
-        .ci/scripts/setup-arm-baremetal-tools.sh
-        source examples/arm/ethos-u-scratch/setup_path.sh
-
-        # Run selective Build
-        chmod +x examples/selective_build/test_selective_build.sh
-        examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
-
-        # Run MCU models
-        chmod +x examples/arm/run_mcu_models_fvp.sh
-        examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
-
-  test-models-windows:
-    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
-    strategy:
-      fail-fast: false
-      matrix:
-        model: [mv3, resnet50, vit, mobilebert, emformer_transcribe]
-        backend: [portable, xnnpack-q8]
-    with:
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 60
-      script: |
-        conda init powershell
-
-        powershell -Command "& {
-          Set-PSDebug -Trace 1
-          \$ErrorActionPreference = 'Stop'
-          \$PSNativeCommandUseErrorActionPreference = \$true
-
-          .ci/scripts/setup-windows.ps1
-
-          .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
-        }"
+# name: trunk
+
+# on:
+#   push:
+#     branches:
+#       - main
+#       - release/*
+#     tags:
+#       - ciflow/trunk/*
+#   pull_request:
+#     paths:
+#       - .ci/docker/ci_commit_pins/pytorch.txt
+#       - .ci/scripts/**
+#   workflow_dispatch:
+
+# concurrency:
+#   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+#   cancel-in-progress: true
+
+# jobs:
+#   test-models-macos-cpu:
+#     name: test-models-macos-cpu
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     strategy:
+#       matrix:
+#         # Mac runners are expensive and limited, and non reliable.
+#         # Do some basic testing for macos jobs, and rely mostly on
+#         # test-models-linux-aarch64 job instead.
+#         model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
+#         backend: [xnnpack-quantization-delegation]
+#         include:
+#           - model: efficient_sam
+#             backend: portable
+#           - model: llama
+#             backend: portable
+#           - model: llama3_2_vision_encoder
+#             backend: portable
+#           - model: mv3
+#             backend: portable
+#       fail-fast: false
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         MODEL_NAME=${{ matrix.model }}
+#         BUILD_TOOL=cmake
+#         BACKEND=${{ matrix.backend }}
+
+#         bash .ci/scripts/setup-conda.sh
+#         # Setup MacOS dependencies as there is no Docker support on MacOS atm
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test executorch
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
+# #  test-models-arm-zephyr:
+# #    name: test-models-arm-zephyr
+# #    uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
+# #    strategy:
+# #      matrix:
+# #        model: [add, softmax, mv2]
+# #      fail-fast: false
+# #    with:
+# #      runner: linux.2xlarge
+# #      docker-image: ci-image:executorch-ubuntu-22.04-zephyr-sdk
+# #      submodules: 'recursive'
+# #      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+# #      timeout: 120
+# #      script: |
+# #        MODEL_NAME=${{ matrix.model }}
+# #        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+# #        conda activate "${CONDA_ENV}"
+# #        if [[ ${{ matrix.model}} == "add" ]]; then
+# #          SIM_LIMIT_SEC=60
+# #        elif [[ ${{ matrix.model}} == "softmax" ]]; then
+# #          SIM_LIMIT_SEC=60
+# #        elif [[ ${{ matrix.model}} == "mv2" ]]; then
+# #          SIM_LIMIT_SEC=5000
+# #        else
+# #          echo "Failed unsupported model selection ${{ matrix.model }}"
+# #          exit 1
+# #        fi
+# #
+# #        source .ci/scripts/utils.sh
+# #        source .ci/scripts/zephyr-utils.sh
+# #        mkdir -p zephyr_scratch/
+# #        cd zephyr_scratch
+# #        export ZEPHYR_PROJ_ROOT=$(realpath $(pwd))
+# #        export ARM_FVP_TUTORIALS_ROOT=$ZEPHYR_PROJ_ROOT/zephyr/samples/modules/executorch/arm-fvp-tutorials
+# #
+# #        # TODO @Bujji: Should see if this can be moved into the docker image itself
+# #        download_arm_zephyr_sdk
+# #        ./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
+# #        cd $ZEPHYR_PROJ_ROOT
+# #        setup_zephyr_et_module
+# #
+# #        # Run setup scripts for Arm FVP and Arm AOT Compilation
+# #        cd $ZEPHYR_PROJ_ROOT/modules/lib/executorch
+# #        install_executorch
+# #        .ci/scripts/setup-arm-baremetal-tools.sh --target-toolchain zephyr
+# #        source examples/arm/ethos-u-scratch/setup_path.sh
+# #        source $ZEPHYR_PROJ_ROOT/zephyr/zephyr-env.sh
+# #
+# #        # Get the model as PTE
+# #        python -m examples.arm.aot_arm_compiler \
+# #            --model_name="${MODEL_NAME}" \
+# #            --output="${MODEL_NAME}.pte"
+# #
+# #        # Generate the C-style header
+# #        cd $ARM_FVP_TUTORIALS_ROOT
+# #        python build_model.py \
+# #            --executorch-root $ZEPHYR_PROJ_ROOT/modules/lib/executorch \
+# #            --pte-file $ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte \
+# #            --output-path $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/src/
+# #
+# #        cd $ARM_FVP_TUTORIALS_ROOT/models/${MODEL_NAME}/
+# #
+# #        # Build the zephyr elf
+# #        west build -p always -b mps3/corstone300/fvp -- \
+# #            -DET_PTE_FILE_PATH_FOR_SELECTIVE_BUILD=$ZEPHYR_PROJ_ROOT/modules/lib/executorch/${MODEL_NAME}.pte
+# #
+# #        # Run the simulation
+# #        FVP_Corstone_SSE-300_Ethos-U55 -a build/zephyr/zephyr.elf \
+# #            -C mps3_board.visualisation.disable-visualisation=1 \
+# #            -C mps3_board.telnetterminal0.start_telnet=0 \
+# #            -C mps3_board.uart0.out_file='sim.out'  \
+# #            -C cpu0.CFGITCMSZ=15 \
+# #            -C cpu0.CFGDTCMSZ=15 \
+# #            --simlimit ${SIM_LIMIT_SEC}
+# #
+# #        # Disable exit on error
+# #        set +e
+# #        # Report failure if any of the ouptut verification checks fail
+# #        grep -qF "ERROR" sim.out
+# #        exit_status=$? #store 0 if found (failure), 1 if not (success)
+# #        if [[ "$exit_status" -eq "0" ]]; then
+# #            cat sim.out
+# #            set -e
+# #            exit 1
+# #        fi
+# #
+# #        # Report fail if simulation does not complete successfully
+# #        grep -qF "SUCCESS: Program complete, exiting." sim.out
+# #        exit_status=$? #store 0 if found (success), 1 if not (failure)
+# #        if [[ "$exit_status" -eq "1" ]]; then
+# #            cat sim.out
+# #            set -e
+# #            exit 1
+# #        fi
+# #        # Re-enable exit on error
+# #        set -e
+
+#   test-models-linux-aarch64:
+#     name: test-models-linux-aarch64
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
+#         backend: [portable, xnnpack-quantization-delegation]
+#         runner: [linux.arm64.2xlarge]
+#         include:
+#           - model: lstm
+#             backend: portable
+#             runner: linux.arm64.2xlarge
+#           - model: mul
+#             backend: portable
+#             runner: linux.arm64.2xlarge
+#           - model: softmax
+#             backend: portable
+#             runner: linux.arm64.2xlarge
+#           - model: phi_4_mini
+#             backend: portable
+#             runner: linux.arm64.m7g.4xlarge
+#           - model: qwen2_5_1_5b
+#             backend: portable
+#             runner: linux.arm64.2xlarge
+#           - model: llama3_2_vision_encoder
+#             backend: portable
+#             runner: linux.arm64.2xlarge
+#       fail-fast: false
+#     with:
+#       runner: ${{ matrix.runner }}
+#       docker-image: ci-image:executorch-ubuntu-22.04-gcc11-aarch64
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         MODEL_NAME=${{ matrix.model }}
+#         BUILD_TOOL="cmake"
+#         BACKEND=${{ matrix.backend }}
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test ExecuTorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
+#   test-custom-ops-macos:
+#     name: test-custom-ops-macos
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     strategy:
+#       matrix:
+#         include:
+#           - build-tool: cmake
+#       fail-fast: false
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       script: |
+#         BUILD_TOOL=${{ matrix.build-tool }}
+
+#         bash .ci/scripts/setup-conda.sh
+#         # Setup MacOS dependencies as there is no Docker support on MacOS atm
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test custom ops
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
+
+#   test-selective-build-macos:
+#     name: test-selective-build-macos
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     strategy:
+#       matrix:
+#         include:
+#           - build-tool: cmake
+#       fail-fast: false
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       script: |
+#         BUILD_TOOL=${{ matrix.build-tool }}
+
+#         bash .ci/scripts/setup-conda.sh
+#         # Setup MacOS dependencies as there is no Docker support on MacOS atm
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test selective build
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
+
+#   test-demo-backend-delegation:
+#     name: test-demo-backend-delegation
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         include:
+#           - build-tool: buck2
+#           - build-tool: cmake
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL=${{ matrix.build-tool }}
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Test selective build
+#         PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
+
+#   test-arm-backend:
+#     name: test-arm-backend
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         include:
+#           - test_arm_baremetal: test_pytest_ops_ethosu_fvp
+#           - test_arm_baremetal: test_pytest_models_ethosu_fvp
+#           - test_arm_baremetal: test_run_ethosu_fvp
+#           - test_arm_baremetal: test_models_tosa
+#           - test_arm_baremetal: test_models_ethos-u55
+#           - test_arm_baremetal: test_models_ethos-u85
+#           - test_arm_baremetal: test_smaller_stories_llama
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge.memory
+#       docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 120
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+
+#         .ci/scripts/setup-arm-baremetal-tools.sh
+
+#         # Increase number of files user can monitor to bypass buck failures.
+#         # Hopefully this is high enough for this setup.
+#         sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
+
+#         ARM_TEST=${{ matrix.test_arm_baremetal }}
+
+#         # Test test_arm_baremetal.sh with test
+#         backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
+
+#   test-arm-cortex-m-size-test:
+#     name: test-arm-cortex-m-size-test
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         os: [bare_metal, zephyr-preset]
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
+#         setup_script_args=""
+#         if [[ ${{ matrix.os}} == "bare_metal" ]]; then
+#           toolchain_prefix=arm-none-eabi-
+#           threshold="110592" # 108 KiB
+#           toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
+#         elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
+#           setup_script_args="--target-toolchain zephyr"
+#           toolchain_prefix=arm-zephyr-eabi-
+#           threshold="135168" # 132 KiB
+#           toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
+#         else
+#           echo "Fail unsupport OS selection ${{ matrix.os }}"
+#           exit 1
+#         fi
+
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+#         .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args}
+#         source examples/arm/ethos-u-scratch/setup_path.sh
+
+#         # User toolchain
+#         ${toolchain_prefix}c++ --version
+
+#         # Setup cmake target to desired toolchain
+#         toolchain_cmake=$(realpath ${toolchain_cmake})
+
+#         # Build and run size test
+#         if [[ ${{ matrix.os}} == "bare_metal" ]]; then
+#           bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
+#         elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
+#           CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
+#           cmake --build cmake-out -j9 --target install --config Release
+#           CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
+#           cmake --build cmake-out/test -j9 --config Release
+#         else
+#           echo "Fail unsupport OS selection ${{ matrix.os }}"
+#           exit 1
+#         fi
+
+#         elf="cmake-out/test/size_test"
+
+#         # Dump basic info
+#         ls -al ${elf}
+#         ${toolchain_prefix}size ${elf}
+
+#         # Dump symbol
+#         python .github/scripts/run_nm.py -e ${elf}
+#         python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}"
+#         python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}"
+
+#         # Add basic guard - TODO: refine this!
+#         ${toolchain_prefix}strip ${elf}
+#         output=$(ls -la ${elf})
+#         arr=($output)
+#         size=${arr[4]}
+#         echo "size: $size, threshold: $threshold"
+#         if [[ "$size" -le "$threshold" ]]; then
+#           echo "Success $size <= $threshold"
+#         else
+#           echo "Fail $size > $threshold"
+#           exit 1
+#         fi
+
+#   test-arm-ootb-linux:
+#     name: test-arm-ootb-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Follow the steps required before running the notebooks
+#         # Try to mirror these as closely as possible
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+
+#         .ci/scripts/setup-arm-baremetal-tools.sh
+#         source examples/arm/ethos-u-scratch/setup_path.sh
+
+#         # Install requirements for converting notebooks
+#         pip install notebook
+
+#         # Run OOTB tests
+#         backends/arm/test/test_arm_ootb.sh
+
+#   test-coreml-delegate:
+#     name: test-coreml-delegate
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     with:
+#       runner: macos-14-xlarge
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         BUILD_TOOL=cmake
+
+#         bash .ci/scripts/setup-conda.sh
+#         # Setup MacOS dependencies as there is no Docker support on MacOS atm
+#         GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+#         # Build and test coreml delegate
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
+
+#   test-static-llama-ane:
+#     name: test-static-llama-ane
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       script: |
+#         set -eux
+#         bash .ci/scripts/setup-conda.sh
+#         eval "$(conda shell.bash hook)"
+
+#         # Install requirements
+#         ${CONDA_RUN} sh install_requirements.sh
+#         ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
+#         ${CONDA_RUN} python install_executorch.py
+#         ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
+
+#         # Test ANE llama
+#         ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
+
+#   test-llama-torchao-lowbit:
+#     name: test-llama-torchao-lowbit
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       script: |
+#         set -eux
+#         bash .ci/scripts/setup-conda.sh
+#         eval "$(conda shell.bash hook)"
+
+#         # Install requirements
+#         ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
+#         ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
+
+#         # Run test
+#         ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
+
+#   test-llama-runner-linux:
+#     # Test Both linux x86 and linux aarch64
+#     name: test-llama-runner-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         mode: [portable, xnnpack+custom]
+#         runner: [linux.2xlarge, linux.arm64.2xlarge]
+#         docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
+#         include:
+#           - dtype: bf16
+#             mode: portable
+#             runner: linux.2xlarge
+#             docker-image: executorch-ubuntu-22.04-clang12
+#           - dtype: bf16
+#             mode: portable
+#             runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#           - dtype: bf16
+#             mode: custom
+#             runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#         # Excluding specific runner + docker image combinations that don't make sense:
+#         #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+#         #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+#         exclude:
+#           - runner: linux.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#           - runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-clang12
+#       fail-fast: false
+#     with:
+#       runner: ${{ matrix.runner }}
+#       docker-image: ci-image:${{ matrix.docker-image }}
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         DTYPE=${{ matrix.dtype }}
+#         BUILD_TOOL="cmake"
+#         MODE=${{ matrix.mode }}
+#         ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
+#         ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
+
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Install requirements for export_llama
+#         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+#         # Test llama2
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
+
+#   test-llama-runner-macos:
+#     name: test-llama-runner-mac
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         mode: [mps, coreml, xnnpack+custom+quantize_kv]
+#       fail-fast: false
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+
+#         DTYPE=${{ matrix.dtype }}
+#         MODE=${{ matrix.mode }}
+
+#         bash .ci/scripts/setup-conda.sh
+
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
+
+#         if [[ "${MODE}" == "coreml" ]]; then
+#           # Install coreml delegate
+#           PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
+#           echo "Finishing installing coreml."
+#         fi
+
+#         # Install requirements for export_llama
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
+#         # Test llama2
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
+
+#   test-torchao-huggingface-checkpoints:
+#     name: test-torchao-huggingface-checkpoints
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         model: [qwen3_4b, phi_4_mini]
+#         runner: [linux.2xlarge]
+#         docker-image: [executorch-ubuntu-22.04-clang12]
+#         backend: [xnnpack]
+#         include:
+#           - model: qwen3_4b
+#             runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#             backend: torchao
+#           - model: phi_4_mini
+#             runner: linux.arm64.2xlarge
+#             docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+#             backend: torchao
+#       fail-fast: false
+#     with:
+#       runner: ${{ matrix.runner }}
+#       docker-image: ci-image:${{ matrix.docker-image }}
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+
+#         if [[ "${{ matrix.backend }}" == "torchao" ]]; then
+#           BUILD_TORCHAO_EXPERIMENTAL=1 TORCHAO_BUILD_CPU_AARCH64=1 TORCHAO_BUILD_KLEIDIAI=1 TORCHAO_ENABLE_ARM_NEON_DOT=1 TORCHAO_PARALLEL_BACKEND=OPENMP pip install third-party/ao
+#         fi
+
+#         pip install -U "huggingface_hub[cli]"
+
+#         bash .ci/scripts/test_torchao_huggingface_checkpoints.sh ${{ matrix.model }} ${{ matrix.model != 'phi_4_mini' && '--test_with_runner' || '' }}  ${{ matrix.backend == 'torchao' && '--use_torchao_kernels' || '' }}
+
+#   test-multimodal-macos:
+#     if: ${{ !github.event.pull_request.head.repo.fork }}
+#     name: test-multimodal-macos
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     secrets: inherit
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         model: ["gemma3-4b"] # llava gives segfault so not covering.
+#     with:
+#       secrets-env: EXECUTORCH_HF_TOKEN
+#       runner: macos-15-xlarge
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         echo "::group::Set up ExecuTorch"
+#         bash .ci/scripts/setup-conda.sh
+#         eval "$(conda shell.bash hook)"
+
+#         # Install requirements
+#         ${CONDA_RUN} python install_executorch.py
+#         echo "::endgroup::"
+
+#         echo "::group::Set up Huggingface"
+#         ${CONDA_RUN} pip install -U "huggingface_hub[cli]" accelerate
+#         ${CONDA_RUN} huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+#         OPTIMUM_ET_VERSION=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+#         ${CONDA_RUN} pip install git+https://github.com/huggingface/optimum-executorch.git@${OPTIMUM_ET_VERSION}
+#         ${CONDA_RUN} pip list
+#         echo "::endgroup::"
+
+#         echo "::group::Test ${{ matrix.model }}"
+#         ${CONDA_RUN} python .ci/scripts/test_huggingface_optimum_model.py --model ${{ matrix.model }} --quantize --recipe xnnpack
+#         echo "::endgroup::"
+
+#   test-qnn-model:
+#     name: test-qnn-model
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l, conv_former]
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
+
+#   test-qnn-optimum-model:
+#     name: test-qnn-optimum-model
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         model: [cvt, dit, efficientnet, focalnet, mobilevit_v1, mobilevit_v2, pvt, swin, albert, bert, distilbert, roberta] # eurobert requires transfomer >= 4.48.0, skip for now
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
+
+#   test-models-macos-coreml:
+#     name: test-models-macos-coreml
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     strategy:
+#       matrix:
+#         model: [dl3, edsr, efficient_sam, emformer_join, emformer_transcribe, ic3, ic4, mobilebert, mv2, mv3, resnet50, vit, w2l]
+#       fail-fast: false
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         MODEL_NAME=${{ matrix.model }}
+#         BUILD_TOOL=cmake
+#         BACKEND="coreml-pybind"
+
+
+#         # Set model specific overrides
+#         if [[ "${MODEL_NAME}" == "mobilebert" ]]; then
+#           # See https://github.com/pytorch/executorch/issues/12907
+#           # mobilebert has nan output on FP16, and high MSE on fp32, so we disable runtime test now
+#           BACKEND="coreml"
+#         fi
+
+#         if [[ "${MODEL_NAME}" == "efficient_sam" ]]; then
+#           # See https://github.com/pytorch/executorch/issues/12906
+#           # efficient_sam fails to run on CoreML
+#           BACKEND="coreml"
+#         fi
+
+#         bash .ci/scripts/setup-conda.sh
+
+#         # Setup MacOS dependencies as there is no Docker support on MacOS atm
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
+#         echo "Finishing installing coreml."
+
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
+#   test-models-macos-mps:
+#     name: test-models-macos-mps
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     strategy:
+#       fail-fast: false
+#     with:
+#       runner: macos-m1-stable
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         BUILD_TOOL=cmake
+#         bash .ci/scripts/setup-conda.sh
+
+#         # Setup MacOS dependencies as there is no Docker support on MacOS atm
+#         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+
+#         # Build and test mps model
+#         for MODEL_NAME in mv3 ic4 resnet50 edsr mobilebert w2l; do
+#           echo "::group::Exporting mps model: $MODEL_NAME"
+#           PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
+#           echo "::endgroup::"
+#         done
+
+#   test-huggingface-transformers-xnnpack:
+#     # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+#     if: ${{ !github.event.pull_request.head.repo.fork }}
+#     name: test-huggingface-transformers-xnnpack
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     secrets: inherit
+#     strategy:
+#       matrix:
+#         config: [
+#           # XNNPack.
+#           llama3.2-1b|xnnpack|--quantize,
+#           qwen3-0.6b|xnnpack|--quantize,
+#           qwen3-1.7b|xnnpack|--quantize,
+#           gemma3-1b|xnnpack|--quantize,
+#           phi4-mini|xnnpack|--quantize,
+#           smollm2-135m|xnnpack|--quantize,
+#           smollm3-3b|xnnpack|--quantize
+#         ]
+#       fail-fast: false
+#     with:
+#       secrets-env: EXECUTORCH_HF_TOKEN
+#       runner: linux.2xlarge.memory
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       upload-artifact: profiling-artifacts-${{ strategy.job-index }}
+#       script: |
+#         set -eux
+#         IFS='|' read -r MODEL RECIPE QUANTIZE <<< "${{ matrix.config }}"
+#         echo "Model: $MODEL"
+#         echo "Recipe: $RECIPE"
+#         echo "Quantize: $QUANTIZE"
+
+#         echo "::group::Set up ExecuTorch"
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+#         # Build executor_runner with ETdump enabled
+#         PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
+#           -DCMAKE_INSTALL_PREFIX=cmake-out \
+#           -DEXECUTORCH_ENABLE_LOGGING=1 \
+#           -DCMAKE_BUILD_TYPE=Release \
+#           -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+#           -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+#           -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+#           -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+#           -DEXECUTORCH_BUILD_XNNPACK=ON \
+#           -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+#           -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+#           -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
+#           -DEXECUTORCH_BUILD_DEVTOOLS=ON \
+#           -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+#           -Bcmake-out .
+#         cmake --build cmake-out -j16 --target install --config Release
+#         echo "::endgroup::"
+
+#         echo "::group::Set up Hugging Face"
+#         pip install -U "huggingface_hub[cli]"
+#         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+#         OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+#         git clone https://github.com/huggingface/optimum-executorch
+#         pushd optimum-executorch
+#         # There is no release yet, for CI stability, always test from the same commit on main
+#         git checkout $OPTIMUM_ET_COMMIT
+#         python install_dev.py --skip_override_torch
+#         popd
+#         pip list
+#         echo "::endgroup::"
+
+#         echo "::group::Run tests"
+#         export OUTPUT_DIR="$(pwd)/${MODEL}_${RECIPE}_${QUANTIZE}"
+#         python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE} --model_dir ${OUTPUT_DIR}
+#         echo "::endgroup::"
+
+#         echo "::group::Generate artifacts for performance profiling"
+#         ./cmake-out/executor_runner \
+#           --model_path ${OUTPUT_DIR}/model.pte \
+#           --etdump_path ${OUTPUT_DIR}/etdump.etdp
+
+#         export TSV_PATH=artifacts-to-be-uploaded/${MODEL}_op_prof.tsv
+#         mkdir -p $(dirname "$TSV_PATH")
+#         python3 -m devtools.inspector.inspector_cli \
+#           --etdump_path ${OUTPUT_DIR}/etdump.etdp \
+#           --tsv_path ${TSV_PATH}
+#         echo "::endgroup::"
+
+#   test-huggingface-transformers-macos:
+#     # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+#     if: ${{ !github.event.pull_request.head.repo.fork }}
+#     name: test-huggingface-transformers-macos
+#     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     secrets: inherit
+#     # Models below selected based on https://huggingface.co/models?pipeline_tag=text-generation&num_parameters=min:0,max:3B&sort=trending.
+#     strategy:
+#       matrix:
+#         config: [
+#           # # XNNPack. (Skipping for now due to intermittent segmentation faults, see https://github.com/huggingface/optimum-executorch/issues/122.)
+#           # llama3.2-1b|xnnpack|--quantize,
+#           # qwen3-0.6b|xnnpack|--quantize,
+#           # qwen3-1.7b|xnnpack|--quantize,
+#           # gemma3-1b|xnnpack|--quantize,
+#           # phi4-mini|xnnpack|--quantize,
+#           # smollm2-135m|xnnpack|--quantize,
+#           # smollm3-3b|xnnpack|--quantize,
+#           # qwen3-1.7b|xnnpack|--quantize,
+#           # CoreML.
+#           llama3.2-1b|coreml_fp32_gpu|--quantize,
+#           qwen3-0.6b|coreml_fp32_gpu|--quantize,
+#           smollm2-135m|coreml_fp32_gpu|--quantize,
+#           olmo-1b|coreml_fp32_gpu|--quantize,
+#           bert|coreml_fp32_gpu|--quantize,
+#           distilbert|coreml_fp32_gpu|--quantize
+#         ]
+#       fail-fast: false
+#     with:
+#       secrets-env: EXECUTORCH_HF_TOKEN
+#       runner: macos-15-xlarge
+#       python-version: '3.11'
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         set -eux
+#         IFS='|' read -r MODEL RECIPE QUANTIZE <<< "${{ matrix.config }}"
+#         echo "Model: $MODEL"
+#         echo "Recipe: $RECIPE"
+#         echo "Quantize: $QUANTIZE"
+
+#         echo "::group::Set up ExecuTorch"
+#         bash .ci/scripts/setup-conda.sh
+#         eval "$(conda shell.bash hook)"
+
+#         # Install requirements
+#         ${CONDA_RUN} python install_executorch.py
+#         echo "::endgroup::"
+
+#         echo "::group::Set up Hugging Face"
+#         pip install -U "huggingface_hub[cli]"
+#         huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+#         OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+#         git clone https://github.com/huggingface/optimum-executorch
+#         pushd optimum-executorch
+#         # There is no release yet, for CI stability, always test from the same commit on main
+#         git checkout $OPTIMUM_ET_COMMIT
+#         ${CONDA_RUN} python install_dev.py --skip_override_torch
+#         popd
+#         ${CONDA_RUN} pip list
+#         echo "::endgroup::"
+
+#         # Run test
+#         ${CONDA_RUN} python .ci/scripts/test_huggingface_optimum_model.py --model ${MODEL} --recipe ${RECIPE} ${QUANTIZE}
+
+#   test-llama-runner-qnn-linux:
+#     name: test-llama-runner-qnn-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       matrix:
+#         dtype: [fp32]
+#         pt2e_quantize: [qnn_16a16w, qnn_8a8w]
+#         mode: [qnn]
+#       fail-fast: false
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 900
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         BUILD_TOOL="cmake"
+#         DTYPE=${{ matrix.dtype }}
+#         MODE=${{ matrix.mode }}
+#         PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
+
+#         ./install_requirements.sh --use-pt-pinned-commit
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Install requirements for export_llama
+#         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+#         # Test llama2
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
+
+#   # this is for filtering out the qnn changes such that qnn jobs only triggered when the specific files are changed
+#   changes:
+#     runs-on: ubuntu-latest
+#     outputs:
+#       qnn: ${{ steps.filter.outputs.qnn }}
+#     steps:
+#       - uses: actions/checkout@v4
+#       - uses: dorny/paths-filter@v3
+#         id: filter
+#         with:
+#           filters: |
+#             qnn:
+#               - 'backends/qualcomm/**'
+#               - 'examples/qualcomm/**'
+#               - 'examples/models/llama/**'
+
+#   test-static-llama-qnn-eval-linux:
+#     needs: changes # has dependency on changes jobs defined above
+#     if: needs.changes.outputs.qnn == 'true'
+#     name: test-static-llama-qnn-eval-linux
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     permissions:
+#       id-token: write
+#       contents: read
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         config:
+#           - name: "baseline"
+#             flags: ""
+#             threshold: 62.0
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 180
+#       script: |
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+#         BUILD_TOOL="cmake"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+#         # Setup executorch
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+#         # Setup install_requirements for llama
+#         PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+
+#         echo ">>> Running config: ${{ matrix.config.name }}"
+#         PYTHON_EXECUTABLE=python bash .ci/scripts/test_qnn_static_llama_eval.sh \
+#           --flags "${{ matrix.config.flags }}" \
+#           --threshold "${{ matrix.config.threshold }}"
+
+#   unittest-release:
+#     uses: ./.github/workflows/_unittest.yml
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       build-mode: Release
+#       build-tool: cmake
+#       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+
+#   test-mcu-models:
+#     name: test-mcu-models
+#     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+#     strategy:
+#       matrix:
+#         include:
+#           - build-tool: cmake
+#       fail-fast: false
+#     permissions:
+#       id-token: write
+#       contents: read
+#     with:
+#       runner: linux.2xlarge
+#       docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 90
+#       script: |
+#         BUILD_TOOL=${{ matrix.build-tool }}
+
+#         # The generic Linux job chooses to use base env, not the one setup by the image
+#         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+#         conda activate "${CONDA_ENV}"
+
+#         # Try to mirror these as closely as possible
+#         source .ci/scripts/utils.sh
+#         install_executorch "--use-pt-pinned-commit"
+
+#         .ci/scripts/setup-arm-baremetal-tools.sh
+#         source examples/arm/ethos-u-scratch/setup_path.sh
+
+#         # Run selective Build
+#         chmod +x examples/selective_build/test_selective_build.sh
+#         examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
+
+#         # Run MCU models
+#         chmod +x examples/arm/run_mcu_models_fvp.sh
+#         examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
+
+#   test-models-windows:
+#     uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+#     strategy:
+#       fail-fast: false
+#       matrix:
+#         model: [mv3, resnet50, vit, mobilebert, emformer_transcribe]
+#         backend: [portable, xnnpack-q8]
+#     with:
+#       submodules: 'recursive'
+#       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+#       timeout: 60
+#       script: |
+#         conda init powershell
+
+#         powershell -Command "& {
+#           Set-PSDebug -Trace 1
+#           \$ErrorActionPreference = 'Stop'
+#           \$PSNativeCommandUseErrorActionPreference = \$true
+
+#           .ci/scripts/setup-windows.ps1
+
+#           .ci/scripts/test_model.ps1 -modelName ${{ matrix.model }} -backend ${{ matrix.backend }}
+#         }"