diff --git a/.ci/scripts/export_optimum_et_coreml.sh b/.ci/scripts/export_optimum_et_coreml.sh
new file mode 100644
index 00000000000..7634ed65c86
--- /dev/null
+++ b/.ci/scripts/export_optimum_et_coreml.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -exu
+
+source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
+
+MODEL_NAME="$1"
+OUTPUT_DIR="$2"
+MODEL_ID=""
+TASK=""
+RECIPE=""
+
+# Map model name to model_id
+case "$MODEL_NAME" in
+  smollm)
+    MODEL_ID="HuggingFaceTB/SmolLM2-135M"
+    TASK="text-generation"
+    RECIPE="coreml_llm_4bit"
+    ;;
+  llama3)
+    MODEL_ID="NousResearch/Llama-3.2-1B"
+    TASK="text-generation"
+    RECIPE="coreml_llm_4bit"
+    ;;
+  *)
+    echo "Error: Unknown model name '$MODEL_NAME'"
+    exit 1
+    ;;
+esac
+
+
+
+# Call the CLI tool with the resolved model_id
+echo "Exporting model: $MODEL_NAME (ID: $MODEL_ID, TASK: $TASK, RECIPE: $RECIPE)"
+optimum-cli export executorch \
+                    --model "${MODEL_ID}" \
+                    --task "${TASK}" \
+                    --recipe "${RECIPE}" \
+                    --output_dir ${OUTPUT_DIR}
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 4baa4ec3b75..b3326cc644d 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -18,557 +18,590 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  test-models-macos:
-    name: test-models-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        # Mac runners are expensive and limited, and non reliable.
-        # Do some basic testing for macos jobs, and rely mostly on
-        # test-models-linux-aarch64 job instead.
-        model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
-        backend: [xnnpack-quantization-delegation]
-        include:
-          - model: efficient_sam
-            backend: portable
-          - model: llama
-            backend: portable
-          - model: llama3_2_vision_encoder
-            backend: portable
-          - model: mv3
-            backend: portable
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=cmake
-        BACKEND=${{ matrix.backend }}
-
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test executorch
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-  test-models-linux-aarch64:
-    name: test-models-linux-aarch64
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
-        backend: [portable, xnnpack-quantization-delegation]
-        runner: [linux.arm64.2xlarge]
-        include:
-          - model: lstm
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: mul
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: softmax
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: phi_4_mini
-            backend: portable
-            runner: linux.arm64.m7g.4xlarge
-          - model: qwen2_5
-            backend: portable
-            runner: linux.arm64.2xlarge
-          - model: llama3_2_vision_encoder
-            backend: portable
-            runner: linux.arm64.2xlarge
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL="cmake"
-        BACKEND=${{ matrix.backend }}
-
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Build and test ExecuTorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
-
-  test-custom-ops-macos:
-    name: test-custom-ops-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
+  # test-models-macos:
+  #   name: test-models-macos
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   strategy:
+  #     matrix:
+  #       # Mac runners are expensive and limited, and non reliable.
+  #       # Do some basic testing for macos jobs, and rely mostly on
+  #       # test-models-linux-aarch64 job instead.
+  #       model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
+  #       backend: [xnnpack-quantization-delegation]
+  #       include:
+  #         - model: efficient_sam
+  #           backend: portable
+  #         - model: llama
+  #           backend: portable
+  #         - model: llama3_2_vision_encoder
+  #           backend: portable
+  #         - model: mv3
+  #           backend: portable
+  #     fail-fast: false
+  #   with:
+  #     runner: macos-m1-stable
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       MODEL_NAME=${{ matrix.model }}
+  #       BUILD_TOOL=cmake
+  #       BACKEND=${{ matrix.backend }}
 
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test custom ops
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
+  #       bash .ci/scripts/setup-conda.sh
+  #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+  #       # Build and test executorch
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
+  # test-models-linux-aarch64:
+  #   name: test-models-linux-aarch64
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
+  #       backend: [portable, xnnpack-quantization-delegation]
+  #       runner: [linux.arm64.2xlarge]
+  #       include:
+  #         - model: lstm
+  #           backend: portable
+  #           runner: linux.arm64.2xlarge
+  #         - model: mul
+  #           backend: portable
+  #           runner: linux.arm64.2xlarge
+  #         - model: softmax
+  #           backend: portable
+  #           runner: linux.arm64.2xlarge
+  #         - model: phi_4_mini
+  #           backend: portable
+  #           runner: linux.arm64.m7g.4xlarge
+  #         - model: qwen2_5
+  #           backend: portable
+  #           runner: linux.arm64.2xlarge
+  #         - model: llama3_2_vision_encoder
+  #           backend: portable
+  #           runner: linux.arm64.2xlarge
+  #     fail-fast: false
+  #   with:
+  #     runner: ${{ matrix.runner }}
+  #     docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
 
-  test-selective-build-macos:
-    name: test-selective-build-macos
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        include:
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        BUILD_TOOL=${{ matrix.build-tool }}
+  #       MODEL_NAME=${{ matrix.model }}
+  #       BUILD_TOOL="cmake"
+  #       BACKEND=${{ matrix.backend }}
 
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test selective build
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
-
-  test-demo-backend-delegation:
-    name: test-demo-backend-delegation
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        include:
-          - build-tool: buck2
-          - build-tool: cmake
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL=${{ matrix.build-tool }}
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Test selective build
-        PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
-
-  test-arm-backend:
-    name: test-arm-backend
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        include:
-          - test_arm_baremetal: test_pytest_ops_ethosu_fvp
-          - test_arm_baremetal: test_pytest_models_ethosu_fvp
-          - test_arm_baremetal: test_run_ethosu_fvp
-          - test_arm_baremetal: test_models_tosa
-          - test_arm_baremetal: test_models_ethos-u55
-          - test_arm_baremetal: test_models_ethos-u85
-      fail-fast: false
-    with:
-      runner: linux.2xlarge.memory
-      docker-image: executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+  #       # Build and test ExecuTorch
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
 
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
+  # test-custom-ops-macos:
+  #   name: test-custom-ops-macos
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   strategy:
+  #     matrix:
+  #       include:
+  #         - build-tool: cmake
+  #     fail-fast: false
+  #   with:
+  #     runner: macos-m1-stable
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     script: |
+  #       BUILD_TOOL=${{ matrix.build-tool }}
 
-        .ci/scripts/setup-arm-baremetal-tools.sh
+  #       bash .ci/scripts/setup-conda.sh
+  #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+  #       # Build and test custom ops
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}"
 
-        # Increase number of files user can monitor to bypass buck failures.
-        # Hopefully this is high enough for this setup.
-        sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
+  # test-selective-build-macos:
+  #   name: test-selective-build-macos
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   strategy:
+  #     matrix:
+  #       include:
+  #         - build-tool: cmake
+  #     fail-fast: false
+  #   with:
+  #     runner: macos-m1-stable
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     script: |
+  #       BUILD_TOOL=${{ matrix.build-tool }}
 
-        ARM_TEST=${{ matrix.test_arm_baremetal }}
+  #       bash .ci/scripts/setup-conda.sh
+  #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+  #       # Build and test selective build
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
+
+  # test-demo-backend-delegation:
+  #   name: test-demo-backend-delegation
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       include:
+  #         - build-tool: buck2
+  #         - build-tool: cmake
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-clang12
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+
+  #       BUILD_TOOL=${{ matrix.build-tool }}
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+  #       # Test selective build
+  #       PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}"
+
+  # test-arm-backend:
+  #   name: test-arm-backend
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       include:
+  #         - test_arm_baremetal: test_pytest_ops_ethosu_fvp
+  #         - test_arm_baremetal: test_pytest_models_ethosu_fvp
+  #         - test_arm_baremetal: test_run_ethosu_fvp
+  #         - test_arm_baremetal: test_models_tosa
+  #         - test_arm_baremetal: test_models_ethos-u55
+  #         - test_arm_baremetal: test_models_ethos-u85
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.2xlarge.memory
+  #     docker-image: executorch-ubuntu-22.04-arm-sdk
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
 
-        # Test test_arm_baremetal.sh with test
-        backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
+  #       source .ci/scripts/utils.sh
+  #       install_executorch "--use-pt-pinned-commit"
 
-  test-arm-cortex-m-size-test:
-    name: test-arm-cortex-m-size-test
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        os: [bare_metal, zephyr-preset]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
-        setup_script_args=""
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then 
-          toolchain_prefix=arm-none-eabi-
-          threshold="103268" # ~100KiB
-          toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then 
-          setup_script_args="--target-toolchain zephyr"
-          toolchain_prefix=arm-zephyr-eabi-
-          threshold="133120" # should be ~125KB, set threshold to 130KB
-          toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
-        else
-          echo "Fail unsupport OS selection ${{ matrix.os }}"
-          exit 1
-        fi
-
-        source .ci/scripts/utils.sh
-        install_executorch "--use-pt-pinned-commit"
-        .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args}
-        source examples/arm/ethos-u-scratch/setup_path.sh
-
-        # User toolchain
-        ${toolchain_prefix}c++ --version
-
-        # Setup cmake target to desired toolchain
-        toolchain_cmake=$(realpath ${toolchain_cmake})
-
-        # Build and run size test
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
-          bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
-          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
-          cmake --build cmake-out -j9 --target install --config Release
-          CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
-          cmake --build cmake-out/test -j9 --config Release
-        else
-          echo "Fail unsupport OS selection ${{ matrix.os }}"
-          exit 1
-        fi
-
-        elf="cmake-out/test/size_test"
-
-        # Dump basic info
-        ls -al ${elf}
-        ${toolchain_prefix}size ${elf}
-
-        # Dump symbol
-        python .github/scripts/run_nm.py -e ${elf}
-        python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}"
-        python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}"
-
-        # Add basic guard - TODO: refine this!
-        ${toolchain_prefix}strip ${elf}
-        output=$(ls -la ${elf})
-        arr=($output)
-        size=${arr[4]}
-        echo "size: $size, threshold: $threshold"
-        if [[ "$size" -le "$threshold" ]]; then
-          echo "Success $size <= $threshold"
-        else
-          echo "Fail $size > $threshold"
-          exit 1
-        fi
-
-  nxp-build-test:
-    name: nxp-build-test
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-arm-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        # Build
-        cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
-        cmake --build cmake-out --target executorch_delegate_neutron --config Release
-
-        # Build check for the neutron backend library
-        lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
-        if [ -f $lib_neutron ]; then
-            echo "Neutron backend library built."
-        else
-            echo "Neutron backend library not found!"
-            exit 1
-        fi
-
-  test-coreml-delegate:
-    name: test-coreml-delegate
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-latest-xlarge
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        BUILD_TOOL=cmake
+  #       .ci/scripts/setup-arm-baremetal-tools.sh
 
-        bash .ci/scripts/setup-conda.sh
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        # Build and test coreml delegate
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
+  #       # Increase number of files user can monitor to bypass buck failures.
+  #       # Hopefully this is high enough for this setup.
+  #       sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024
 
-  test-static-llama-ane:
-    name: test-static-llama-ane
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        set -eux
-        bash .ci/scripts/setup-conda.sh
-        eval "$(conda shell.bash hook)"
+  #       ARM_TEST=${{ matrix.test_arm_baremetal }}
 
-        # Install requirements
-        ${CONDA_RUN} sh install_requirements.sh
-        ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
-        ${CONDA_RUN} python install_executorch.py
-        ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
+  #       # Test test_arm_baremetal.sh with test
+  #       backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}"
 
-        # Test ANE llama
-        ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
+  # test-arm-cortex-m-size-test:
+  #   name: test-arm-cortex-m-size-test
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       os: [bare_metal, zephyr-preset]
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-arm-sdk
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+
+  #       cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
+  #       setup_script_args=""
+  #       if [[ ${{ matrix.os}} == "bare_metal" ]]; then
+  #         toolchain_prefix=arm-none-eabi-
+  #         threshold="103268" # ~100KiB
+  #         toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
+  #       elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
+  #         setup_script_args="--target-toolchain zephyr"
+  #         toolchain_prefix=arm-zephyr-eabi-
+  #         threshold="133120" # should be ~125KB, set threshold to 130KB
+  #         toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
+  #       else
+  #         echo "Fail unsupport OS selection ${{ matrix.os }}"
+  #         exit 1
+  #       fi
+
+  #       source .ci/scripts/utils.sh
+  #       install_executorch "--use-pt-pinned-commit"
+  #       .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args}
+  #       source examples/arm/ethos-u-scratch/setup_path.sh
+
+  #       # User toolchain
+  #       ${toolchain_prefix}c++ --version
+
+  #       # Setup cmake target to desired toolchain
+  #       toolchain_cmake=$(realpath ${toolchain_cmake})
+
+  #       # Build and run size test
+  #       if [[ ${{ matrix.os}} == "bare_metal" ]]; then
+  #         bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
+  #       elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
+  #         CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
+  #         cmake --build cmake-out -j9 --target install --config Release
+  #         CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
+  #         cmake --build cmake-out/test -j9 --config Release
+  #       else
+  #         echo "Fail unsupport OS selection ${{ matrix.os }}"
+  #         exit 1
+  #       fi
+
+  #       elf="cmake-out/test/size_test"
+
+  #       # Dump basic info
+  #       ls -al ${elf}
+  #       ${toolchain_prefix}size ${elf}
+
+  #       # Dump symbol
+  #       python .github/scripts/run_nm.py -e ${elf}
+  #       python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}"
+  #       python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}"
+
+  #       # Add basic guard - TODO: refine this!
+  #       ${toolchain_prefix}strip ${elf}
+  #       output=$(ls -la ${elf})
+  #       arr=($output)
+  #       size=${arr[4]}
+  #       echo "size: $size, threshold: $threshold"
+  #       if [[ "$size" -le "$threshold" ]]; then
+  #         echo "Success $size <= $threshold"
+  #       else
+  #         echo "Fail $size > $threshold"
+  #         exit 1
+  #       fi
+
+  # nxp-build-test:
+  #   name: nxp-build-test
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-arm-sdk
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+
+  #       # Build
+  #       cmake -DEXECUTORCH_BUILD_NXP_NEUTRON=ON -Bcmake-out .
+  #       cmake --build cmake-out --target executorch_delegate_neutron --config Release
+
+  #       # Build check for the neutron backend library
+  #       lib_neutron="cmake-out/backends/nxp/libexecutorch_delegate_neutron.a"
+  #       if [ -f $lib_neutron ]; then
+  #           echo "Neutron backend library built."
+  #       else
+  #           echo "Neutron backend library not found!"
+  #           exit 1
+  #       fi
+
+  # test-coreml-delegate:
+  #   name: test-coreml-delegate
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   with:
+  #     runner: macos-latest-xlarge
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       BUILD_TOOL=cmake
 
-  test-llama-torchao-lowbit:
-    name: test-llama-torchao-lowbit
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      script: |
-        set -eux
-        bash .ci/scripts/setup-conda.sh
-        eval "$(conda shell.bash hook)"
+  #       bash .ci/scripts/setup-conda.sh
+  #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
+  #       GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+  #       # Build and test coreml delegate
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh
 
-        # Install requirements
-        ${CONDA_RUN} python install_executorch.py
-        ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
-
-        # Run test
-        ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
-
-  test-llama-runner-linux:
-    # Test Both linux x86 and linux aarch64
-    name: test-llama-runner-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        mode: [portable, xnnpack+custom]
-        runner: [linux.2xlarge, linux.arm64.2xlarge]
-        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
-        include:
-          - dtype: bf16
-            mode: portable
-            runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-          - dtype: bf16
-            mode: portable
-            runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-          - dtype: bf16
-            mode: custom
-            runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-        # Excluding specific runner + docker image combinations that don't make sense:
-        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
-        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
-        exclude:
-          - runner: linux.2xlarge
-            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
-          - runner: linux.arm64.2xlarge
-            docker-image: executorch-ubuntu-22.04-clang12
-      fail-fast: false
-    with:
-      runner: ${{ matrix.runner }}
-      docker-image: ${{ matrix.docker-image }}
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        DTYPE=${{ matrix.dtype }}
-        BUILD_TOOL="cmake"
-        MODE=${{ matrix.mode }}
-        ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
-        ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
-
-  test-llama-runner-macos:
-    name: test-llama-runner-mac
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
-    strategy:
-      matrix:
-        dtype: [fp32]
-        mode: [mps, coreml, xnnpack+custom+quantize_kv]
-      fail-fast: false
-    with:
-      runner: macos-m1-stable
-      python-version: '3.11'
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
+  # test-static-llama-ane:
+  #   name: test-static-llama-ane
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   with:
+  #     runner: macos-m1-stable
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     script: |
+  #       set -eux
+  #       bash .ci/scripts/setup-conda.sh
+  #       eval "$(conda shell.bash hook)"
 
-        DTYPE=${{ matrix.dtype }}
-        MODE=${{ matrix.mode }}
+  #       # Install requirements
+  #       ${CONDA_RUN} sh install_requirements.sh
+  #       ${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh
+  #       ${CONDA_RUN} python install_executorch.py
+  #       ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
 
-        bash .ci/scripts/setup-conda.sh
+  #       # Test ANE llama
+  #       ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
 
-        # Setup executorch
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
+  # test-llama-torchao-lowbit:
+  #   name: test-llama-torchao-lowbit
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   with:
+  #     runner: macos-m1-stable
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     script: |
+  #       set -eux
+  #       bash .ci/scripts/setup-conda.sh
+  #       eval "$(conda shell.bash hook)"
+
+  #       # Install requirements
+  #       ${CONDA_RUN} python install_executorch.py
+  #       ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
+
+  #       # Run test
+  #       ${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh
+
+  # test-llama-runner-linux:
+  #   # Test Both linux x86 and linux aarch64
+  #   name: test-llama-runner-linux
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       dtype: [fp32]
+  #       mode: [portable, xnnpack+custom]
+  #       runner: [linux.2xlarge, linux.arm64.2xlarge]
+  #       docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
+  #       include:
+  #         - dtype: bf16
+  #           mode: portable
+  #           runner: linux.2xlarge
+  #           docker-image: executorch-ubuntu-22.04-clang12
+  #         - dtype: bf16
+  #           mode: portable
+  #           runner: linux.arm64.2xlarge
+  #           docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+  #         - dtype: bf16
+  #           mode: custom
+  #           runner: linux.arm64.2xlarge
+  #           docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+  #       # Excluding specific runner + docker image combinations that don't make sense:
+  #       #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+  #       #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+  #       exclude:
+  #         - runner: linux.2xlarge
+  #           docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+  #         - runner: linux.arm64.2xlarge
+  #           docker-image: executorch-ubuntu-22.04-clang12
+  #     fail-fast: false
+  #   with:
+  #     runner: ${{ matrix.runner }}
+  #     docker-image: ${{ matrix.docker-image }}
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 900
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+
+  #       DTYPE=${{ matrix.dtype }}
+  #       BUILD_TOOL="cmake"
+  #       MODE=${{ matrix.mode }}
+  #       ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
+  #       ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
+
+  #       # Setup executorch
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+  #       # Install requirements for export_llama
+  #       PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+  #       # Test llama2
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
+
+  # test-llama-runner-macos:
+  #   name: test-llama-runner-mac
+  #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  #   strategy:
+  #     matrix:
+  #       dtype: [fp32]
+  #       mode: [mps, coreml, xnnpack+custom+quantize_kv]
+  #     fail-fast: false
+  #   with:
+  #     runner: macos-m1-stable
+  #     python-version: '3.11'
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 900
+  #     script: |
 
-        if [[ "${MODE}" == "coreml" ]]; then
-          # Install coreml delegate
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
-          echo "Finishing installing coreml."
-        fi
+  #       DTYPE=${{ matrix.dtype }}
+  #       MODE=${{ matrix.mode }}
 
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
+  #       bash .ci/scripts/setup-conda.sh
 
-  # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
-  # test-llava-runner-macos:
-  #   name: test-llava-runner-macos
+  #       # Setup executorch
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake
+
+  #       if [[ "${MODE}" == "coreml" ]]; then
+  #         # Install coreml delegate
+  #         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
+  #         echo "Finishing installing coreml."
+  #       fi
+
+  #       # Install requirements for export_llama
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
+  #       # Test llama2
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}"
+
+  # # # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner.
+  # # test-llava-runner-macos:
+  # #   name: test-llava-runner-macos
+  # #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  # #   strategy:
+  # #     fail-fast: false
+  # #   with:
+  # #     runner: macos-14-xlarge
+  # #     python-version: '3.11'
+  # #     submodules: 'recursive'
+  # #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  # #     timeout: 900
+  # #     script: |
+  # #       BUILD_TOOL=cmake
+
+  # #       bash .ci/scripts/setup-conda.sh
+  # #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
+  # #       GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+
+  # #       # install Llava requirements
+  # #       ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
+  # #       ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
+
+  # #       # run python unittest
+  # #       ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
+
+  # #       # run e2e (export, tokenizer and runner)
+  # #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
+
+  # test-qnn-model:
+  #   name: test-qnn-model
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       dtype: [fp32]
+  #       model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-qnn-sdk
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 900
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
+
+  # test-qnn-optimum-model:
+  #   name: test-qnn-optimum-model
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       dtype: [fp32]
+  #       model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-qnn-sdk
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 900
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
+
+  # test-apple-model:
+  #   name: test-apple-model
   #   uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
   #   strategy:
   #     fail-fast: false
   #   with:
-  #     runner: macos-14-xlarge
+  #     runner: macos-m1-stable
   #     python-version: '3.11'
   #     submodules: 'recursive'
   #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-  #     timeout: 900
+  #     timeout: 90
   #     script: |
   #       BUILD_TOOL=cmake
 
   #       bash .ci/scripts/setup-conda.sh
-  #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
-  #       GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
 
-  #       # install Llava requirements
-  #       ${CONDA_RUN} bash examples/models/llama/install_requirements.sh
-  #       ${CONDA_RUN} bash examples/models/llava/install_requirements.sh
-
-  #       # run python unittest
-  #       ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava
-
-  #       # run e2e (export, tokenizer and runner)
-  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh
-
-  test-qnn-model:
-    name: test-qnn-model
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
-
-  test-qnn-optimum-model:
-    name: test-qnn-optimum-model
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        model: [albert, bert, distilbert] # eurobert requires transfomer >= 4.48.0, skip for now
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
-
-  test-apple-model:
-    name: test-apple-model
+  #       # Setup MacOS dependencies as there is no Docker support on MacOS atm
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
+  #       PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
+  #       echo "Finishing installing coreml."
+
+  #       # Build and test coreml model
+  #       MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
+  #       for MODEL_NAME in "${MODELS[@]}"; do
+  #         echo "::group::Exporting coreml model: $MODEL_NAME"
+  #         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml"
+  #         echo "::endgroup::"
+
+  #         echo "::group::Exporting mps model: $MODEL_NAME"
+  #         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
+  #         echo "::endgroup::"
+  #       done
+
+  test-coreml-optimum-executorch:
+    name: test-coreml-optimum-executorch
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       fail-fast: false
@@ -579,224 +612,233 @@ jobs:
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       script: |
-        BUILD_TOOL=cmake
-
+        set -eux
         bash .ci/scripts/setup-conda.sh
+        eval "$(conda shell.bash hook)"
 
-        # Setup MacOS dependencies as there is no Docker support on MacOS atm
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh
-        echo "Finishing installing coreml."
-
-        # Build and test coreml model
-        MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l)
-        for MODEL_NAME in "${MODELS[@]}"; do
-          echo "::group::Exporting coreml model: $MODEL_NAME"
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml"
-          echo "::endgroup::"
+        # Install requirements
+        ${CONDA_RUN} python install_executorch.py
 
-          echo "::group::Exporting mps model: $MODEL_NAME"
-          PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps"
-          echo "::endgroup::"
-        done
+        # # Setup MacOS dependencies as there is no Docker support on MacOS atm
+        # PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
 
-  test-huggingface-transformers:
-    # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
-    if: ${{ !github.event.pull_request.head.repo.fork }}
-    name: test-huggingface-transformers
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    secrets: inherit
-    strategy:
-      matrix:
-        hf_model_id: [
-          google/gemma-3-1b-it,
-          Qwen/Qwen3-0.6B,
-          HuggingFaceTB/SmolLM2-135M,
-          meta-llama/Llama-3.2-1B,
-          allenai/OLMo-1B-hf,
-        ]
-      fail-fast: false
-    with:
-      secrets-env: EXECUTORCH_HF_TOKEN
-      runner: linux.2xlarge.memory
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      upload-artifact: profiling-artifacts-${{ strategy.job-index }}
-      script: |
-        echo "::group::Set up ExecuTorch"
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
-        # Build executor_runner with ETdump enabled
-        PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
-          -DCMAKE_INSTALL_PREFIX=cmake-out \
-          -DEXECUTORCH_ENABLE_LOGGING=1 \
-          -DCMAKE_BUILD_TYPE=Release \
-          -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-          -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-          -DEXECUTORCH_BUILD_XNNPACK=ON \
-          -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-          -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-          -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-          -DEXECUTORCH_BUILD_DEVTOOLS=ON \
-          -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
-          -Bcmake-out .
-        cmake --build cmake-out -j16 --target install --config Release
-        echo "::endgroup::"
+        pip list
 
         echo "::group::Set up Hugging Face"
         pip install -U "huggingface_hub[cli]"
-        huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
         OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
         git clone https://github.com/huggingface/optimum-executorch
         pushd optimum-executorch
         # There is no release yet, for CI stability, always test from the same commit on main
-        git checkout $OPTIMUM_ET_COMMIT
-        python install_dev.py --skip_override_torch
-        popd
-        pip list
-        echo "::endgroup::"
-
-        echo "::group::Export to ExecuTorch"
-        # Pass matrix variable as environment variable
-        export MODEL_ID="${{ matrix.hf_model_id }}"
-        export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w"
-        pushd optimum-executorch
-
-        ARGS=(
-          "--model" "${MODEL_ID}"
-          "--task" "text-generation"
-          "--recipe" "xnnpack"
-          "--use_custom_sdpa"
-          "--use_custom_kv_cache"
-          "--qlinear"
-          "--qembedding"
-          "--output_dir" "${OUTPUT_DIR}"
-        )
-
-        optimum-cli export executorch "${ARGS[@]}"
-
-        ls -FlAGhp ${OUTPUT_DIR}
-        popd
-        echo "::endgroup::"
-
-        echo "::group::Inference using python API"
-        pushd optimum-executorch
-        python -c "
-        import os
-        from optimum.executorch import ExecuTorchModelForCausalLM
-        from transformers import AutoTokenizer
-
-        model_id = os.getenv('MODEL_ID')
-        pte_dir = os.getenv('OUTPUT_DIR')
-        print(f'Loading model {model_id} from {pte_dir}.')
-        model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
-        generated_text = model.text_generation(
-          tokenizer=AutoTokenizer.from_pretrained(model_id),
-          prompt='Simply put, the theory of relativity states that',
-          max_seq_len=64
-        )
-        print(generated_text)
-        "
+        # git checkout $OPTIMUM_ET_COMMIT
+        gh pr checkout 93
+        ${CONDA_RUN} python install_dev.py --skip_override_torch
         popd
+        ${CONDA_RUN} pip list
         echo "::endgroup::"
 
-        echo "::group::Inference using executor_runner with ETDump"
-        ./cmake-out/executor_runner \
-          --model_path ${OUTPUT_DIR}/model.pte \
-          --etdump_path ${OUTPUT_DIR}/etdump.etdp
-
-        export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
-        mkdir -p $(dirname "$TSV_PATH")
-        python3 -m devtools.inspector.inspector_cli \
-          --etdump_path ${OUTPUT_DIR}/etdump.etdp \
-          --tsv_path ${TSV_PATH}
-
-        echo "::endgroup::"
-
+        MODELS=(smollm llama3)
+        for MODEL_NAME in "${MODELS[@]}"; do
+          ${CONDA_RUN} bash .ci/scripts/export_optimum_et_coreml.sh "${MODEL_NAME}" "/tmp/${MODEL_NAME}"
+        done
 
-  test-llama-runner-qnn-linux:
-    name: test-llama-runner-qnn-linux
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      matrix:
-        dtype: [fp32]
-        pt2e_quantize: [qnn_16a16w, qnn_8a8w]
-        mode: [qnn]
-      fail-fast: false
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-qnn-sdk
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 900
-      script: |
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
-
-        BUILD_TOOL="cmake"
-        DTYPE=${{ matrix.dtype }}
-        MODE=${{ matrix.mode }}
-        PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
-
-        ./install_requirements.sh --use-pt-pinned-commit
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
-        PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
-
-        # Setup executorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
-        # Install requirements for export_llama
-        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
-        # Test llama2
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
-
-  unittest-release:
-    uses: ./.github/workflows/_unittest.yml
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      build-mode: Release
-      build-tool: cmake
-      docker-image: executorch-ubuntu-22.04-clang12
-
-  unittest-nxp-neutron:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
-    permissions:
-      id-token: write
-      contents: read
-    with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
-      submodules: 'recursive'
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: 90
-      script: |
-        set -eux
+  # test-huggingface-transformers:
+  #   # NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway
+  #   if: ${{ !github.event.pull_request.head.repo.fork }}
+  #   name: test-huggingface-transformers
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   secrets: inherit
+  #   strategy:
+  #     matrix:
+  #       hf_model_id: [
+  #         google/gemma-3-1b-it,
+  #         Qwen/Qwen3-0.6B,
+  #         HuggingFaceTB/SmolLM2-135M,
+  #         meta-llama/Llama-3.2-1B,
+  #         allenai/OLMo-1B-hf,
+  #       ]
+  #     fail-fast: false
+  #   with:
+  #     secrets-env: EXECUTORCH_HF_TOKEN
+  #     runner: linux.2xlarge.memory
+  #     docker-image: executorch-ubuntu-22.04-clang12
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     upload-artifact: profiling-artifacts-${{ strategy.job-index }}
+  #     script: |
+  #       echo "::group::Set up ExecuTorch"
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
+  #       # Build executor_runner with ETdump enabled
+  #       PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \
+  #         -DCMAKE_INSTALL_PREFIX=cmake-out \
+  #         -DEXECUTORCH_ENABLE_LOGGING=1 \
+  #         -DCMAKE_BUILD_TYPE=Release \
+  #         -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
+  #         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+  #         -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
+  #         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
+  #         -DEXECUTORCH_BUILD_XNNPACK=ON \
+  #         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
+  #         -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+  #         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
+  #         -DEXECUTORCH_BUILD_DEVTOOLS=ON \
+  #         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+  #         -Bcmake-out .
+  #       cmake --build cmake-out -j16 --target install --config Release
+  #       echo "::endgroup::"
+
+  #       echo "::group::Set up Hugging Face"
+  #       pip install -U "huggingface_hub[cli]"
+  #       huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN
+  #       OPTIMUM_ET_COMMIT=$(cat .ci/docker/ci_commit_pins/optimum-executorch.txt)
+  #       git clone https://github.com/huggingface/optimum-executorch
+  #       pushd optimum-executorch
+  #       # There is no release yet, for CI stability, always test from the same commit on main
+  #       git checkout $OPTIMUM_ET_COMMIT
+  #       python install_dev.py --skip_override_torch
+  #       popd
+  #       pip list
+  #       echo "::endgroup::"
+
+  #       echo "::group::Export to ExecuTorch"
+  #       # Pass matrix variable as environment variable
+  #       export MODEL_ID="${{ matrix.hf_model_id }}"
+  #       export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w"
+  #       pushd optimum-executorch
+
+  #       ARGS=(
+  #         "--model" "${MODEL_ID}"
+  #         "--task" "text-generation"
+  #         "--recipe" "xnnpack"
+  #         "--use_custom_sdpa"
+  #         "--use_custom_kv_cache"
+  #         "--qlinear"
+  #         "--qembedding"
+  #         "--output_dir" "${OUTPUT_DIR}"
+  #       )
+
+  #       optimum-cli export executorch "${ARGS[@]}"
+
+  #       ls -FlAGhp ${OUTPUT_DIR}
+  #       popd
+  #       echo "::endgroup::"
+
+  #       echo "::group::Inference using python API"
+  #       pushd optimum-executorch
+  #       python -c "
+  #       import os
+  #       from optimum.executorch import ExecuTorchModelForCausalLM
+  #       from transformers import AutoTokenizer
+
+  #       model_id = os.getenv('MODEL_ID')
+  #       pte_dir = os.getenv('OUTPUT_DIR')
+  #       print(f'Loading model {model_id} from {pte_dir}.')
+  #       model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir)
+  #       generated_text = model.text_generation(
+  #         tokenizer=AutoTokenizer.from_pretrained(model_id),
+  #         prompt='Simply put, the theory of relativity states that',
+  #         max_seq_len=64
+  #       )
+  #       print(generated_text)
+  #       "
+  #       popd
+  #       echo "::endgroup::"
+
+  #       echo "::group::Inference using executor_runner with ETDump"
+  #       ./cmake-out/executor_runner \
+  #         --model_path ${OUTPUT_DIR}/model.pte \
+  #         --etdump_path ${OUTPUT_DIR}/etdump.etdp
+
+  #       export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv
+  #       mkdir -p $(dirname "$TSV_PATH")
+  #       python3 -m devtools.inspector.inspector_cli \
+  #         --etdump_path ${OUTPUT_DIR}/etdump.etdp \
+  #         --tsv_path ${TSV_PATH}
+
+  #       echo "::endgroup::"
+
+
+  # test-llama-runner-qnn-linux:
+  #   name: test-llama-runner-qnn-linux
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   strategy:
+  #     matrix:
+  #       dtype: [fp32]
+  #       pt2e_quantize: [qnn_16a16w, qnn_8a8w]
+  #       mode: [qnn]
+  #     fail-fast: false
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-qnn-sdk
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 900
+  #     script: |
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
+
+  #       BUILD_TOOL="cmake"
+  #       DTYPE=${{ matrix.dtype }}
+  #       MODE=${{ matrix.mode }}
+  #       PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
+
+  #       ./install_requirements.sh --use-pt-pinned-commit
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+
+  #       # Setup executorch
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+  #       # Install requirements for export_llama
+  #       PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+  #       # Test llama2
+  #       PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
+
+  # unittest-release:
+  #   uses: ./.github/workflows/_unittest.yml
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   with:
+  #     build-mode: Release
+  #     build-tool: cmake
+  #     docker-image: executorch-ubuntu-22.04-clang12
+
+  # unittest-nxp-neutron:
+  #   uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+  #   permissions:
+  #     id-token: write
+  #     contents: read
+  #   with:
+  #     runner: linux.2xlarge
+  #     docker-image: executorch-ubuntu-22.04-clang12
+  #     submodules: 'recursive'
+  #     ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+  #     timeout: 90
+  #     script: |
+  #       set -eux
 
-        # The generic Linux job chooses to use base env, not the one setup by the image
-        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
-        conda activate "${CONDA_ENV}"
+  #       # The generic Linux job chooses to use base env, not the one setup by the image
+  #       CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+  #       conda activate "${CONDA_ENV}"
 
-        # Build and install Executorch
-        PYTHON_EXECUTABLE=python \
-        CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
-        .ci/scripts/setup-linux.sh --build-tool "cmake"
+  #       # Build and install Executorch
+  #       PYTHON_EXECUTABLE=python \
+  #       CMAKE_ARGS="-DEXECUTORCH_BUILD_NXP_NEUTRON=ON" \
+  #       .ci/scripts/setup-linux.sh --build-tool "cmake"
 
-        # Install test requirements
-        pip install -r backends/nxp/requirements-tests.txt
+  #       # Install test requirements
+  #       pip install -r backends/nxp/requirements-tests.txt
 
-        # Run pytest
-        PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh
+  #       # Run pytest
+  #       PYTHON_EXECUTABLE=python bash backends/nxp/run_unittests.sh