From 9b2cb874a5d0f2e694e5fbc1e47d24598cb1e2d2 Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 13 Mar 2025 14:49:16 -0400
Subject: [PATCH 01/19] Fix macos test-model jobs (#9235)

Fixing https://github.com/pytorch/executorch/pull/9227,

It wasn't running efficient_sam, llama etc. Because I had to define all
variables in the include statement.

Since build-tool and runner are singletons, I just hard code it.

Test Plan:

Make sure trunk jobs have softmax, efficient_sam etc.
---
 .github/workflows/trunk.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index dd73e7321ee..359b8c0f6d3 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -25,8 +25,6 @@ jobs:
       matrix:
         model: [add, add_mul, emformer_join, emformer_transcribe, ic3, ic4, linear, llama2, mobilebert, mv2, mv3, resnet18, resnet50, vit, w2l]
         backend: [portable, xnnpack-quantization-delegation]
-        build-tool: [cmake]
-        runner: [macos-m1-stable]
         include:
           - model: efficient_sam
             backend: portable
@@ -46,14 +44,14 @@ jobs:
             backend: portable
       fail-fast: false
     with:
-      runner: ${{ matrix.runner }}
+      runner: macos-m1-stable
       python-version: '3.11'
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
       script: |
         MODEL_NAME=${{ matrix.model }}
-        BUILD_TOOL=${{ matrix.build-tool }}
+        BUILD_TOOL=cmake
         BACKEND=${{ matrix.backend }}
         DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
 

From 08f0f7a1fc05f7cbeb124f9e326ed517776fcb1f Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 13 Mar 2025 15:17:58 -0400
Subject: [PATCH 02/19] Another fix for CI job refactoring (#9237)

Fixing https://github.com/pytorch/executorch/pull/9227

matrix.timeout is not populated. Just hard-code the value now.

Also fix another cleanup DEMO_BACKEND_ID is not there anymore.

Test Plan: Make sure there are 6 jobs (test-models-linux) in the
pull.yml category.
---
 .github/workflows/pull.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index c066a7045aa..cc4b04f197a 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -56,7 +56,7 @@ jobs:
       docker-image: executorch-ubuntu-22.04-clang12
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      timeout: ${{ matrix.timeout }}
+      timeout: 90
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
@@ -65,11 +65,10 @@ jobs:
         MODEL_NAME=${{ matrix.model }}
         BUILD_TOOL=${{ matrix.build-tool }}
         BACKEND=${{ matrix.backend }}
-        DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
 
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
         # Build and test ExecuTorch
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
 
   test-llama-runner-linux:
     name: test-llama-runner-linux

From 570e06c6088bb54e838c936afeeac21957e20175 Mon Sep 17 00:00:00 2001
From: jathu <jathu.satkunarajah@gmail.com>
Date: Thu, 13 Mar 2025 13:29:42 -0700
Subject: [PATCH 03/19] Build flatc for the host instead of the target platform
 (#9077)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Summary

* Fixes https://github.com/pytorch/executorch/issues/7260
* We use flatc as part of the build step to generate some files from
FlatBuffer definitions. This implies the tool runs on the host. However,
we currently build flatc as part of the main build — which propagates
the target CXX flags to flatc. Thus, flatc gets built targeting the
target platform. To ensure flatc is built for the host, we can use
include flatc as an
[ExternalProject](https://cmake.org/cmake/help/latest/module/ExternalProject.html).
This does not propagate the CXX flags
* Some targets implicitly depended on flatc, we now make that
requirement explicit
* We currently spread the `FLATC_EXECUTABLE` defaulting across the
project. Let's just centralize this at the root

### Test plan

```bash
$ ./install_executorch.sh

# Previously flatc was built against the Android target, now they target the host
$ ./build/build_android_library.sh
$ file /Users/jathu/executorch/cmake-out-android-arm64-v8a/third-party/flatbuffers/flatc
/Users/jathu/executorch/cmake-out-android-arm64-v8a/third-party/flatbuffers/flatc: Mach-O 64-bit executable arm64

# Apple builds work as usual, but they use flatc from pip
$ ./build/build_apple_frameworks.sh
```

cc @larryliu0820 @lucylq @swolchok @dbort
---
 CMakeLists.txt                                | 79 ++++++++++++-------
 backends/apple/mps/CMakeLists.txt             |  5 +-
 backends/qualcomm/CMakeLists.txt              |  6 +-
 backends/vulkan/CMakeLists.txt                |  5 +-
 backends/xnnpack/CMakeLists.txt               |  5 +-
 devtools/CMakeLists.txt                       |  6 +-
 examples/apple/mps/CMakeLists.txt             |  4 -
 .../flat_tensor/serialize/CMakeLists.txt      |  8 +-
 schema/CMakeLists.txt                         |  8 +-
 setup.py                                      |  4 -
 10 files changed, 59 insertions(+), 71 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fabf667cbe1..54e2e1ebfbf 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -474,6 +474,17 @@ cmake_dependent_option(
   "NOT FLATC_EXECUTABLE;EXECUTORCH_BUILD_HOST_TARGETS" OFF
 )
 
+
+set(FLATBUFFERS_BUILD_FLATC OFF CACHE BOOL "")
+set(FLATBUFFERS_BUILD_FLATHASH OFF CACHE BOOL "")
+set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "")
+set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "")
+set(FLATBUFFERS_INSTALL OFF CACHE BOOL "")
+# exir lets users set the alignment of tensor data embedded in the flatbuffer,
+# and some users need an alignment larger than the default, which is typically
+# 32.
+set(FLATBUFFERS_MAX_ALIGNMENT 1024)
+
 if(EXECUTORCH_BUILD_FLATC)
   if(FLATC_EXECUTABLE)
     # We could ignore this, but it could lead to confusion about which `flatc`
@@ -482,41 +493,49 @@ if(EXECUTORCH_BUILD_FLATC)
       FATAL_ERROR "May not set both EXECUTORCH_BUILD_FLATC and FLATC_EXECUTABLE"
     )
   endif()
-  set(FLATC_EXECUTABLE flatc)
-  set(FLATBUFFERS_BUILD_FLATC
-      ON
-      CACHE BOOL ""
-  )
-  set(FLATBUFFERS_BUILD_FLATHASH
-      OFF
-      CACHE BOOL ""
-  )
-  set(FLATBUFFERS_BUILD_FLATLIB
-      OFF
-      CACHE BOOL ""
-  )
-  set(FLATBUFFERS_BUILD_TESTS
-      OFF
-      CACHE BOOL ""
-  )
-  set(FLATBUFFERS_INSTALL
-      OFF
-      CACHE BOOL ""
-  )
-  add_subdirectory(third-party/flatbuffers)
 
-  # exir lets users set the alignment of tensor data embedded in the flatbuffer,
-  # and some users need an alignment larger than the default, which is typically
-  # 32.
-  target_compile_definitions(flatc PRIVATE FLATBUFFERS_MAX_ALIGNMENT=1024)
+  # Build flatc for the *host* to generate files as part of the build step.
+  include(ExternalProject)
+  ExternalProject_Add(
+    flatbuffers
+    PREFIX ${CMAKE_CURRENT_BINARY_DIR}/third-party/flatbuffers
+    BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/third-party/flatbuffers
+    SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/third-party/flatbuffers
+    CMAKE_ARGS -DFLATBUFFERS_BUILD_FLATC=ON
+               -DFLATBUFFERS_BUILD_FLATHASH=${FLATBUFFERS_BUILD_FLATHASH}
+               -DFLATBUFFERS_BUILD_FLATLIB=${FLATBUFFERS_BUILD_FLATLIB}
+               -DFLATBUFFERS_BUILD_TESTS=${FLATBUFFERS_BUILD_TESTS}
+               -DFLATBUFFERS_INSTALL=${FLATBUFFERS_INSTALL}
+               -DCMAKE_BUILD_TYPE=Release
+               -DCMAKE_CXX_FLAGS="-DFLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}"
+    INSTALL_COMMAND ""
+  )
+  ExternalProject_Get_Property(flatbuffers BINARY_DIR)
+  set(FLATC_EXECUTABLE ${BINARY_DIR}/flatc)
+  set(FLATC_EXECUTABLE_BUILT_FROM_SOURCE YES)
 endif()
+
 if(NOT FLATC_EXECUTABLE)
   message(
-    FATAL_ERROR
-      "FLATC_EXECUTABLE must be set when EXECUTORCH_BUILD_FLATC is disabled. "
-      "Note that EXECUTORCH_BUILD_FLATC may be disabled implicitly when "
-      "cross-compiling or when EXECUTORCH_BUILD_HOST_TARGETS is disabled."
+    WARNING "FLATC_EXECUTABLE not specified, looking for flatc"
   )
+  find_program(FLATC_EXECUTABLE flatc)
+
+  if(NOT FLATC_EXECUTABLE)
+    message(
+      FATAL_ERROR
+        "FLATC_EXECUTABLE must be set when EXECUTORCH_BUILD_FLATC is disabled. "
+        "Note that EXECUTORCH_BUILD_FLATC may be disabled implicitly when "
+        "cross-compiling or when EXECUTORCH_BUILD_HOST_TARGETS is disabled."
+    )
+  endif()
+endif()
+
+add_executable(flatc IMPORTED GLOBAL)
+set_target_properties(flatc PROPERTIES IMPORTED_LOCATION ${FLATC_EXECUTABLE})
+
+if(FLATC_EXECUTABLE_BUILT_FROM_SOURCE)
+  add_dependencies(flatc flatbuffers)
 endif()
 
 #
diff --git a/backends/apple/mps/CMakeLists.txt b/backends/apple/mps/CMakeLists.txt
index 96aa007563b..4bd4077a0f6 100644
--- a/backends/apple/mps/CMakeLists.txt
+++ b/backends/apple/mps/CMakeLists.txt
@@ -22,10 +22,6 @@ if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
 endif()
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 set(_common_compile_options -Wno-deprecated-declarations)
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
@@ -50,6 +46,7 @@ add_custom_command(
     "${_mps_schema__include_dir}/executorch/backends/apple/mps"
     ${_mps_schema__srcs}
   WORKING_DIRECTORY ${EXECUTORCH_ROOT}
+  DEPENDS flatc
   COMMENT "Generating mps_schema headers"
   VERBATIM
 )
diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
index 560cac176b3..f5adc84f903 100644
--- a/backends/qualcomm/CMakeLists.txt
+++ b/backends/qualcomm/CMakeLists.txt
@@ -39,16 +39,13 @@ if(${ANDROID})
   find_library(android_log log)
 endif()
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 set(qcir_schema_include_dir ${CMAKE_CURRENT_LIST_DIR}/aot/ir)
 set(qcir_schema_output ${qcir_schema_include_dir}/qcir_generated.h)
 add_custom_command(
   OUTPUT qcir_schema_output
   COMMAND ${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --scoped-enums -o
           ${qcir_schema_include_dir} ${qcir_schema_include_dir}/qcir.fbs
+  DEPENDS flatc
   COMMENT "Generating qualcomm ir schema headers"
   VERBATIM
 )
@@ -100,6 +97,7 @@ add_custom_command(
     "${_qnn_schema__include_dir}/executorch/backends/qualcomm"
     ${_qnn_schema__srcs}
   WORKING_DIRECTORY ${EXECUTORCH_SOURCE_DIR}
+  DEPENDS flatc
   COMMENT "Generating qnn_schema headers"
   VERBATIM
 )
diff --git a/backends/vulkan/CMakeLists.txt b/backends/vulkan/CMakeLists.txt
index fca34fdf6a4..db90bdc7c29 100644
--- a/backends/vulkan/CMakeLists.txt
+++ b/backends/vulkan/CMakeLists.txt
@@ -28,10 +28,6 @@ if(NOT PYTHON_EXECUTABLE)
   set(PYTHON_EXECUTABLE python3)
 endif()
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 # Include this file to access target_link_options_shared_lib This is required to
 # provide access to target_link_options_shared_lib which allows libraries to be
 # linked with the --whole-archive flag. This is required for libraries that
@@ -92,6 +88,7 @@ add_custom_command(
     ${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --scoped-enums -o
     "${SCHEMA_INCLUDE_DIR}/executorch/backends/vulkan/serialization/" ${_vulkan_schema__srcs}
   WORKING_DIRECTORY ${EXECUTORCH_ROOT}
+  DEPENDS flatc
   COMMENT "Generating vulkan_schema headers"
   VERBATIM
 )
diff --git a/backends/xnnpack/CMakeLists.txt b/backends/xnnpack/CMakeLists.txt
index a453b16aa58..8b3bf3d91c1 100644
--- a/backends/xnnpack/CMakeLists.txt
+++ b/backends/xnnpack/CMakeLists.txt
@@ -18,10 +18,6 @@ if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 17)
 endif()
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 # Source root directory for executorch.
 if(NOT EXECUTORCH_ROOT)
   set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
@@ -82,6 +78,7 @@ add_custom_command(
     ${_xnnpack_schema__srcs}
   COMMAND mv ${_xnnpack_flatbuffer__outputs} ${_xnnpack_schema__outputs}
   WORKING_DIRECTORY ${EXECUTORCH_ROOT}
+  DEPENDS flatc
   COMMENT "Generating xnnpack_schema headers"
   VERBATIM
 )
diff --git a/devtools/CMakeLists.txt b/devtools/CMakeLists.txt
index 3f3a836c12b..abd33bac886 100644
--- a/devtools/CMakeLists.txt
+++ b/devtools/CMakeLists.txt
@@ -38,10 +38,6 @@ if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
 endif()
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 # Paths to headers generated from the .fbs files. set(_etdump_schemas
 # etdump_schema_flatcc.fbs scalar_type.fbs)
 
@@ -205,7 +201,7 @@ add_custom_command(
     "${_bundled_schema__include_dir}/executorch/devtools/bundled_program/schema"
     ${_bundled_program_schema__srcs}
   WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/devtools
-  DEPENDS ${FLATC_EXECUTABLE} ${_bundled_program_schema__srcs}
+  DEPENDS flatc ${_bundled_program_schema__srcs}
   COMMENT "Generating bundled_program headers"
   VERBATIM
 )
diff --git a/examples/apple/mps/CMakeLists.txt b/examples/apple/mps/CMakeLists.txt
index 319d8159ced..06fbce8d569 100644
--- a/examples/apple/mps/CMakeLists.txt
+++ b/examples/apple/mps/CMakeLists.txt
@@ -18,10 +18,6 @@ if(NOT CMAKE_CXX_STANDARD)
   set(CMAKE_CXX_STANDARD 17)
 endif()
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 # Source root directory for executorch.
 if(NOT EXECUTORCH_ROOT)
   set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../../..)
diff --git a/extension/flat_tensor/serialize/CMakeLists.txt b/extension/flat_tensor/serialize/CMakeLists.txt
index f1278c804db..d1ae797f8b3 100644
--- a/extension/flat_tensor/serialize/CMakeLists.txt
+++ b/extension/flat_tensor/serialize/CMakeLists.txt
@@ -9,10 +9,6 @@
 # cmake-format -i CMakeLists.txt
 # ~~~
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 # The include directory that will contain the generated schema headers.
 set(_flat_tensor_schema__include_dir "${CMAKE_BINARY_DIR}/extension/flat_tensor/include")
 set(_flat_tensor_schema__output_dir "${_flat_tensor_schema__include_dir}/executorch/extension/flat_tensor/serialize")
@@ -37,7 +33,7 @@ function(generate_flat_tensor_schema _schema_srcs _schema_name)
       ${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --gen-mutable --scoped-enums -o
       "${_flat_tensor_schema__output_dir}" ${_schema_srcs}
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    DEPENDS ${FLATC_EXECUTABLE} ${_schema_srcs}
+    DEPENDS flatc ${_schema_srcs}
     COMMENT "Generating ${_schema_name} headers"
     VERBATIM
   )
@@ -49,7 +45,7 @@ function(generate_flat_tensor_schema _schema_srcs _schema_name)
   # and some users need an alignment larger than the default, which is typically
   # 32.
   target_compile_definitions(
-    ${_schema_name} INTERFACE FLATBUFFERS_MAX_ALIGNMENT=1024
+    ${_schema_name} INTERFACE FLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}
   )
 
   target_include_directories(
diff --git a/schema/CMakeLists.txt b/schema/CMakeLists.txt
index 64f8821da1e..484363acdf5 100644
--- a/schema/CMakeLists.txt
+++ b/schema/CMakeLists.txt
@@ -9,10 +9,6 @@
 # cmake-format -i CMakeLists.txt
 # ~~~
 
-if(NOT FLATC_EXECUTABLE)
-  set(FLATC_EXECUTABLE flatc)
-endif()
-
 # The include directory that will contain the generated schema headers.
 set(_program_schema__include_dir "${CMAKE_BINARY_DIR}/schema/include")
 set(_program_schema__output_dir "${_program_schema__include_dir}/executorch/schema")
@@ -37,7 +33,7 @@ function(generate_program_schema _schema_srcs _schema_name)
       ${FLATC_EXECUTABLE} --cpp --cpp-std c++11 --gen-mutable --scoped-enums -o
       "${_program_schema__output_dir}" ${_schema_srcs}
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    DEPENDS ${FLATC_EXECUTABLE} ${_schema_srcs}
+    DEPENDS flatc ${_schema_srcs}
     COMMENT "Generating ${_schema_name} headers"
     VERBATIM
   )
@@ -49,7 +45,7 @@ function(generate_program_schema _schema_srcs _schema_name)
   # and some users need an alignment larger than the default, which is typically
   # 32.
   target_compile_definitions(
-    ${_schema_name} INTERFACE FLATBUFFERS_MAX_ALIGNMENT=1024
+    ${_schema_name} INTERFACE FLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}
   )
 
   target_include_directories(
diff --git a/setup.py b/setup.py
index 28251cdf0df..32ec94708af 100644
--- a/setup.py
+++ b/setup.py
@@ -652,10 +652,6 @@ def run(self):
 
         build_args = [f"-j{self.parallel}"]
 
-        # TODO(dbort): Try to manage these targets and the cmake args from the
-        # extension entries themselves instead of hard-coding them here.
-        build_args += ["--target", "flatc"]
-
         if ShouldBuild.pybindings():
             cmake_args += [
                 "-DEXECUTORCH_BUILD_PYBIND=ON",

From 1c2a69ef7f76a86c0d19ce76c25999344fed975d Mon Sep 17 00:00:00 2001
From: jathu <jathu.satkunarajah@gmail.com>
Date: Thu, 13 Mar 2025 13:31:50 -0700
Subject: [PATCH 04/19] [build Folder Migration] Move build/Codegen.cmake
 (#9185)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### Summary

A series of diffs as a part of
https://github.com/pytorch/executorch/issues/9117. Move it from
`./build` → `./scripts/build`

### Test plan

CI

cc @larryliu0820 @lucylq
---
 backends/cadence/CMakeLists.txt                     | 2 +-
 backends/cadence/fusion_g3/operators/CMakeLists.txt | 2 +-
 backends/cadence/hifi/operators/CMakeLists.txt      | 2 +-
 backends/cadence/reference/operators/CMakeLists.txt | 2 +-
 configurations/CMakeLists.txt                       | 2 +-
 docs/source/kernel-library-selective-build.md       | 2 +-
 examples/apple/mps/CMakeLists.txt                   | 2 +-
 examples/arm/CMakeLists.txt                         | 2 +-
 examples/devtools/CMakeLists.txt                    | 2 +-
 examples/mediatek/CMakeLists.txt                    | 2 +-
 examples/models/llama/runner/CMakeLists.txt         | 2 +-
 examples/models/llava/runner/CMakeLists.txt         | 2 +-
 examples/portable/custom_ops/CMakeLists.txt         | 2 +-
 examples/qualcomm/CMakeLists.txt                    | 2 +-
 examples/selective_build/CMakeLists.txt             | 2 +-
 extension/llm/custom_ops/CMakeLists.txt             | 2 +-
 extension/llm/runner/CMakeLists.txt                 | 2 +-
 kernels/optimized/CMakeLists.txt                    | 2 +-
 kernels/portable/CMakeLists.txt                     | 2 +-
 kernels/quantized/CMakeLists.txt                    | 2 +-
 {build => scripts/build}/Codegen.cmake              | 0
 21 files changed, 20 insertions(+), 20 deletions(-)
 rename {build => scripts/build}/Codegen.cmake (100%)

diff --git a/backends/cadence/CMakeLists.txt b/backends/cadence/CMakeLists.txt
index 65dd5430588..a96d38e5141 100644
--- a/backends/cadence/CMakeLists.txt
+++ b/backends/cadence/CMakeLists.txt
@@ -28,7 +28,7 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..
 add_compile_definitions(C10_USING_CUSTOM_GENERATED_MACROS)
 
 if(EXECUTORCH_CADENCE_CPU_RUNNER)
-  include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+  include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
   if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/backends/cadence/fusion_g3/operators/CMakeLists.txt b/backends/cadence/fusion_g3/operators/CMakeLists.txt
index f39614ee4f3..b3dbcae4135 100644
--- a/backends/cadence/fusion_g3/operators/CMakeLists.txt
+++ b/backends/cadence/fusion_g3/operators/CMakeLists.txt
@@ -12,7 +12,7 @@ if(NOT CMAKE_CXX_STANDARD)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/backends/cadence/hifi/operators/CMakeLists.txt b/backends/cadence/hifi/operators/CMakeLists.txt
index 28b3c3b6aca..36469dc92e7 100644
--- a/backends/cadence/hifi/operators/CMakeLists.txt
+++ b/backends/cadence/hifi/operators/CMakeLists.txt
@@ -12,7 +12,7 @@ if(NOT CMAKE_CXX_STANDARD)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/backends/cadence/reference/operators/CMakeLists.txt b/backends/cadence/reference/operators/CMakeLists.txt
index 69a104277fd..7d213a12813 100644
--- a/backends/cadence/reference/operators/CMakeLists.txt
+++ b/backends/cadence/reference/operators/CMakeLists.txt
@@ -12,7 +12,7 @@ if(NOT CMAKE_CXX_STANDARD)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/configurations/CMakeLists.txt b/configurations/CMakeLists.txt
index cf304d92523..5279ffa566c 100644
--- a/configurations/CMakeLists.txt
+++ b/configurations/CMakeLists.txt
@@ -25,7 +25,7 @@ endif()
 set(_common_compile_options -Wno-deprecated-declarations)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED)
   # Merge optimized and portable definitions, taking optimized where available.
diff --git a/docs/source/kernel-library-selective-build.md b/docs/source/kernel-library-selective-build.md
index 1a7562942e0..fe2d53c8be8 100644
--- a/docs/source/kernel-library-selective-build.md
+++ b/docs/source/kernel-library-selective-build.md
@@ -36,7 +36,7 @@ The basic flow looks like this:
 
 ## APIs
 
-We expose a CMake macro `[gen_selected_ops](https://github.com/pytorch/executorch/blob/main/build/Codegen.cmake#L12)`, to allow users specifying op info:
+We expose a CMake macro `[gen_selected_ops](https://github.com/pytorch/executorch/blob/main/scripts/build/Codegen.cmake#L12)`, to allow users specifying op info:
 
 ```
 gen_selected_ops(
diff --git a/examples/apple/mps/CMakeLists.txt b/examples/apple/mps/CMakeLists.txt
index 06fbce8d569..d3bf0fb0321 100644
--- a/examples/apple/mps/CMakeLists.txt
+++ b/examples/apple/mps/CMakeLists.txt
@@ -59,7 +59,7 @@ if(NOT CMAKE_TOOLCHAIN_FILE MATCHES ".*(iOS|ios\.toolchain)\.cmake$")
 
   # portable_ops_lib
   include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-  include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+  include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
   gen_selected_ops(LIB_NAME "mps_portable_ops_lib" INCLUDE_ALL_OPS "ON")
   generate_bindings_for_kernels(
     LIB_NAME "mps_portable_ops_lib" FUNCTIONS_YAML
diff --git a/examples/arm/CMakeLists.txt b/examples/arm/CMakeLists.txt
index 0c754beaaaf..2f8055ce5e9 100644
--- a/examples/arm/CMakeLists.txt
+++ b/examples/arm/CMakeLists.txt
@@ -36,7 +36,7 @@ find_package(executorch CONFIG REQUIRED HINTS ${CMAKE_INSTALL_PREFIX})
 target_include_directories(executorch INTERFACE ${_common_include_directories})
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 # Generate C++ bindings to register kernels into both PyTorch (for AOT) and
 # Executorch (for runtime). Here select all ops in functions.yaml
diff --git a/examples/devtools/CMakeLists.txt b/examples/devtools/CMakeLists.txt
index 7ed5232ba41..9319135f8e9 100644
--- a/examples/devtools/CMakeLists.txt
+++ b/examples/devtools/CMakeLists.txt
@@ -23,7 +23,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/examples/mediatek/CMakeLists.txt b/examples/mediatek/CMakeLists.txt
index 826a2c17fa0..d8e2a5bf667 100644
--- a/examples/mediatek/CMakeLists.txt
+++ b/examples/mediatek/CMakeLists.txt
@@ -20,7 +20,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/examples/models/llama/runner/CMakeLists.txt b/examples/models/llama/runner/CMakeLists.txt
index b707f385f33..919bc356551 100644
--- a/examples/models/llama/runner/CMakeLists.txt
+++ b/examples/models/llama/runner/CMakeLists.txt
@@ -21,7 +21,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 #
 # The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
diff --git a/examples/models/llava/runner/CMakeLists.txt b/examples/models/llava/runner/CMakeLists.txt
index 2d0c30a620e..7bad4a827ae 100644
--- a/examples/models/llava/runner/CMakeLists.txt
+++ b/examples/models/llava/runner/CMakeLists.txt
@@ -21,7 +21,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 # Let files say "include <executorch/path/to/header.h>".
 set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 
diff --git a/examples/portable/custom_ops/CMakeLists.txt b/examples/portable/custom_ops/CMakeLists.txt
index 02736cca964..9d165d342d0 100644
--- a/examples/portable/custom_ops/CMakeLists.txt
+++ b/examples/portable/custom_ops/CMakeLists.txt
@@ -28,7 +28,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/examples/qualcomm/CMakeLists.txt b/examples/qualcomm/CMakeLists.txt
index 0a46c061b64..c8946f63a6b 100644
--- a/examples/qualcomm/CMakeLists.txt
+++ b/examples/qualcomm/CMakeLists.txt
@@ -16,7 +16,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/examples/selective_build/CMakeLists.txt b/examples/selective_build/CMakeLists.txt
index c2ce3f09e7a..6647f0a62b4 100644
--- a/examples/selective_build/CMakeLists.txt
+++ b/examples/selective_build/CMakeLists.txt
@@ -22,7 +22,7 @@ set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/../..)
 set(TORCH_ROOT ${EXECUTORCH_ROOT}/third-party/pytorch)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/extension/llm/custom_ops/CMakeLists.txt b/extension/llm/custom_ops/CMakeLists.txt
index eeb118d4344..6dec5d136ea 100644
--- a/extension/llm/custom_ops/CMakeLists.txt
+++ b/extension/llm/custom_ops/CMakeLists.txt
@@ -23,7 +23,7 @@ endif()
 set(_common_compile_options -Wno-deprecated-declarations -fPIC)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 #
 # The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt
index a9245768b9d..7adb980d224 100644
--- a/extension/llm/runner/CMakeLists.txt
+++ b/extension/llm/runner/CMakeLists.txt
@@ -21,7 +21,7 @@ if(NOT EXECUTORCH_ROOT)
 endif()
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 #
 # The `_<target>_srcs` lists are defined by including ${EXECUTORCH_SRCS_FILE}.
diff --git a/kernels/optimized/CMakeLists.txt b/kernels/optimized/CMakeLists.txt
index 23e26bfa72b..6ed55c73e28 100644
--- a/kernels/optimized/CMakeLists.txt
+++ b/kernels/optimized/CMakeLists.txt
@@ -34,7 +34,7 @@ list(APPEND _common_compile_options -DET_BUILD_WITH_BLAS)
 # compiling for avx2 for now punting this to come back
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/kernels/portable/CMakeLists.txt b/kernels/portable/CMakeLists.txt
index e15970329c1..5072723296c 100644
--- a/kernels/portable/CMakeLists.txt
+++ b/kernels/portable/CMakeLists.txt
@@ -24,7 +24,7 @@ endif()
 set(_common_compile_options -Wno-deprecated-declarations)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt
index 6b01ba4fc27..c3c4c161b5f 100644
--- a/kernels/quantized/CMakeLists.txt
+++ b/kernels/quantized/CMakeLists.txt
@@ -27,7 +27,7 @@ endif()
 set(_common_compile_options -Wno-deprecated-declarations)
 
 include(${EXECUTORCH_ROOT}/build/Utils.cmake)
-include(${EXECUTORCH_ROOT}/build/Codegen.cmake)
+include(${EXECUTORCH_ROOT}/scripts/build/Codegen.cmake)
 
 if(NOT PYTHON_EXECUTABLE)
   resolve_python_executable()
diff --git a/build/Codegen.cmake b/scripts/build/Codegen.cmake
similarity index 100%
rename from build/Codegen.cmake
rename to scripts/build/Codegen.cmake

From 1a918c779e16c0ee903a08b30c1c666d1efb2c57 Mon Sep 17 00:00:00 2001
From: Berker Soyluoglu <bsoyluoglu@gmail.com>
Date: Thu, 13 Mar 2025 13:39:50 -0700
Subject: [PATCH 05/19] Move ModelDataKit to ExecuTorch directory

Differential Revision: D70825994

Pull Request resolved: https://github.com/pytorch/executorch/pull/9160
---
 .../ModelRunnerDataKit/ModelRuntime.swift     | 14 ++++++++++
 .../ModelRuntimeValueError.swift              | 27 +++++++++++++++++++
 .../Tensor/ModelRuntimeTensorValue.swift      | 17 ++++++++++++
 .../ModelRuntimeTensorValueBridging.swift     | 16 +++++++++++
 .../ModelRuntimeTensorValueFactory.swift      |  7 +++++
 .../Value/ModelRuntimeValue.swift             | 22 +++++++++++++++
 .../Value/ModelRuntimeValueBridging.swift     |  9 +++++++
 .../Value/ModelRuntimeValueFactory.swift      |  8 ++++++
 8 files changed, 120 insertions(+)
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntime.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntimeValueError.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValue.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueBridging.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueFactory.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValue.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueBridging.swift
 create mode 100644 extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueFactory.swift

diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntime.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntime.swift
new file mode 100644
index 00000000000..0c0da69996c
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntime.swift
@@ -0,0 +1,14 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public enum ModelRuntimeError: Error {
+  case unsupportedInputType
+}
+
+public protocol ModelRuntime {
+  func infer(input: [ModelRuntimeValue]) throws -> [ModelRuntimeValue]
+
+  func getModelValueFactory() -> ModelRuntimeValueFactory
+  func getModelTensorFactory() -> ModelRuntimeTensorValueFactory
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntimeValueError.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntimeValueError.swift
new file mode 100644
index 00000000000..c5af8e02d62
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/ModelRuntimeValueError.swift
@@ -0,0 +1,27 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public enum ModelRuntimeValueError: Error, CustomStringConvertible {
+  case unsupportedType(String)
+  case invalidType(String, String)
+
+  public var description: String {
+    switch self {
+    case .unsupportedType(let type):
+      return "Unsupported type: \(type)"
+    case .invalidType(let expectedType, let type):
+      return "Invalid type: \(type), expected \(expectedType)"
+    }
+  }
+}
+
+@objc public class ModelRuntimeValueErrorFactory: NSObject {
+  @objc public class func unsupportedType(_ type: String) -> Error {
+    return ModelRuntimeValueError.unsupportedType(type)
+  }
+
+  @objc public class func invalidType(_ actualType: String, expectedType: String) -> Error {
+    return ModelRuntimeValueError.invalidType(expectedType, actualType)
+  }
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValue.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValue.swift
new file mode 100644
index 00000000000..46c8066f2bd
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValue.swift
@@ -0,0 +1,17 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public class ModelRuntimeTensorValue {
+  public let innerValue: ModelRuntimeTensorValueBridging
+  public init(innerValue: ModelRuntimeTensorValueBridging) {
+    self.innerValue = innerValue
+  }
+
+  public func floatRepresentation() throws -> (floatArray: [Float], shape: [Int]) {
+    let value = try innerValue.floatRepresentation()
+    let data = value.floatArray
+    let shape = value.shape
+    return (data.compactMap { $0.floatValue }, shape.compactMap { $0.intValue })
+  }
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueBridging.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueBridging.swift
new file mode 100644
index 00000000000..6328565fc02
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueBridging.swift
@@ -0,0 +1,16 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public class ModelRuntimeTensorValueBridgingTuple: NSObject {
+  @objc public let floatArray: [NSNumber]
+  @objc public let shape: [NSNumber]
+  @objc public init(floatArray: [NSNumber], shape: [NSNumber]) {
+    self.floatArray = floatArray
+    self.shape = shape
+  }
+}
+
+@objc public protocol ModelRuntimeTensorValueBridging {
+  func floatRepresentation() throws -> ModelRuntimeTensorValueBridgingTuple
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueFactory.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueFactory.swift
new file mode 100644
index 00000000000..5565a807e8b
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Tensor/ModelRuntimeTensorValueFactory.swift
@@ -0,0 +1,7 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public protocol ModelRuntimeTensorValueFactory {
+  func createFloatTensor(value: [Float], shape: [Int]) -> ModelRuntimeTensorValue
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValue.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValue.swift
new file mode 100644
index 00000000000..c27c17ec2f2
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValue.swift
@@ -0,0 +1,22 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public class ModelRuntimeValue {
+  public let value: ModelRuntimeValueBridging
+  public init(innerValue: ModelRuntimeValueBridging) {
+    self.value = innerValue
+  }
+
+  public func stringValue() throws -> String {
+    return try value.stringValue()
+  }
+
+  public func tensorValue() throws -> ModelRuntimeTensorValue {
+    return try ModelRuntimeTensorValue(innerValue: value.tensorValue())
+  }
+
+  public func arrayValue() throws -> [ModelRuntimeValue] {
+    return try value.arrayValue().map { ModelRuntimeValue(innerValue: $0) }
+  }
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueBridging.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueBridging.swift
new file mode 100644
index 00000000000..3eb4c532f4e
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueBridging.swift
@@ -0,0 +1,9 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+@objc public protocol ModelRuntimeValueBridging {
+  func stringValue() throws -> String
+  func tensorValue() throws -> ModelRuntimeTensorValueBridging
+  func arrayValue() throws -> [ModelRuntimeValueBridging]
+}
diff --git a/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueFactory.swift b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueFactory.swift
new file mode 100644
index 00000000000..40e5ea74267
--- /dev/null
+++ b/extension/apple/ModelRunnerDataKit/ModelRunnerDataKit/Value/ModelRuntimeValueFactory.swift
@@ -0,0 +1,8 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import Foundation
+
+public protocol ModelRuntimeValueFactory {
+  func createString(value: String) throws -> ModelRuntimeValue
+  func createTensor(value: ModelRuntimeTensorValue) throws -> ModelRuntimeValue
+}

From 54c3f78b696acb779550acace60ce734b34e070c Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Thu, 13 Mar 2025 14:06:43 -0700
Subject: [PATCH 06/19] Add thread_parallel_interface to all_deps for portable
 util (#9242)

Unbreaks et_operator_library buck rule, which copies portable_lib source
files into a new target that needs deps.
---
 kernels/portable/cpu/util/targets.bzl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernels/portable/cpu/util/targets.bzl b/kernels/portable/cpu/util/targets.bzl
index bf2fe042a93..95fd1734d8e 100644
--- a/kernels/portable/cpu/util/targets.bzl
+++ b/kernels/portable/cpu/util/targets.bzl
@@ -32,6 +32,7 @@ def define_common_targets():
             "//executorch/kernels/portable/cpu/util:slice_util",
             "//executorch/kernels/portable/cpu/util:elementwise_util",
             "//executorch/kernels/portable/cpu/util:upsample_util",
+            "//executorch/runtime/kernel:thread_parallel_interface",
         ],
         visibility = ["//executorch/...", "@EXECUTORCH_CLIENTS"],
     )

From 2407647e729aad43f45d29bfc3c85751bbdc2de7 Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Thu, 13 Mar 2025 14:11:04 -0700
Subject: [PATCH 07/19] Put extension/parallel buck files back (#9232)

I shouldn't have deleted these, for the same reason I left
thread_parallel.h behind.
---
 extension/parallel/TARGETS     |  8 ++++++++
 extension/parallel/targets.bzl | 22 ++++++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 extension/parallel/TARGETS
 create mode 100644 extension/parallel/targets.bzl

diff --git a/extension/parallel/TARGETS b/extension/parallel/TARGETS
new file mode 100644
index 00000000000..2341af9282f
--- /dev/null
+++ b/extension/parallel/TARGETS
@@ -0,0 +1,8 @@
+# Any targets that should be shared between fbcode and xplat must be defined in
+# targets.bzl. This file can contain fbcode-only targets.
+
+load(":targets.bzl", "define_common_targets")
+
+oncall("executorch")
+
+define_common_targets()
diff --git a/extension/parallel/targets.bzl b/extension/parallel/targets.bzl
new file mode 100644
index 00000000000..dbfb3ff160c
--- /dev/null
+++ b/extension/parallel/targets.bzl
@@ -0,0 +1,22 @@
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "get_aten_mode_options", "runtime")
+
+def define_common_targets():
+    """Defines targets that should be shared between fbcode and xplat.
+
+    The directory containing this targets.bzl file should also contain both
+    TARGETS and BUCK files that call this function.
+    """
+
+    runtime.cxx_library(
+        name = "thread_parallel",
+        exported_headers = [
+            "thread_parallel.h",
+        ],
+        visibility = [
+            "//executorch/...",
+            "@EXECUTORCH_CLIENTS",
+        ],
+        deps = [
+            "//executorch/runtime/kernel:thread_parallel_interface",
+        ],
+    )

From 4f95fd020bd0d1c6f97f7de402840de5108c3858 Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 13 Mar 2025 17:46:30 -0400
Subject: [PATCH 08/19] Add back linux pull jobs (#9239)

Looks like I lied when I said
https://github.com/pytorch/executorch/pull/9227 was a no-op

Adding back pull jobs for linux x86
---
 .github/workflows/pull.yml | 57 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 55 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index cc4b04f197a..91699d639f3 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -38,8 +38,8 @@ jobs:
         # Build and test ExecuTorch with the add model on portable backend.
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "add" "${BUILD_TOOL}" "portable"
 
-  test-models-linux:
-    name: test-models-linux
+  test-models-linux-basic:
+    name: test-models-linux-basic
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
       id-token: write
@@ -70,6 +70,59 @@ jobs:
         # Build and test ExecuTorch
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
 
+  test-models-linux:
+    name: test-models-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      matrix:
+        model: [linear, add, add_mul, ic3, mv2, resnet18, resnet50, mobilebert, emformer_transcribe]
+        backend: [portable, xnnpack-quantization-delegation]
+        runner: [linux.2xlarge]
+        include:
+          - model: ic4
+            backend: portable
+            runner: linux.4xlarge.memory
+          - model: ic4
+            backend: xnnpack-quantization-delegation
+            runner: linux.4xlarge.memory
+          - model: emformer_join
+            backend: portable
+            runner: linux.4xlarge.memory
+          - model: emformer_join
+            backend: xnnpack-quantization-delegation
+            runner: linux.4xlarge.memory
+          - model: phi-4-mini
+            backend: portable
+            runner: linux.4xlarge.memory
+          - model: llama3_2_vision_encoder
+            backend: portable
+            runner: linux.4xlarge.memory
+          - model: w2l
+            backend: portable
+            runner: linux.4xlarge.memory
+      fail-fast: false
+    with:
+      runner: ${{ matrix.runner }}
+      docker-image: executorch-ubuntu-22.04-clang12
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        MODEL_NAME=${{ matrix.model }}
+        BUILD_TOOL=cmake
+        BACKEND=${{ matrix.backend }}
+
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+        # Build and test ExecuTorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
+
   test-llama-runner-linux:
     name: test-llama-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main

From 630d0cc6ed6d20d81e4c2d725bfeff25fa270c8d Mon Sep 17 00:00:00 2001
From: Sam Gondelman <sgondelman@meta.com>
Date: Thu, 13 Mar 2025 14:49:08 -0700
Subject: [PATCH 09/19] Don't use designated initializers in QueryPool.cpp

Differential Revision: D70933388

Pull Request resolved: https://github.com/pytorch/executorch/pull/9116
---
 backends/vulkan/runtime/vk_api/QueryPool.cpp | 27 ++++++++++----------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/backends/vulkan/runtime/vk_api/QueryPool.cpp b/backends/vulkan/runtime/vk_api/QueryPool.cpp
index b029cea7081..2f6d433b887 100644
--- a/backends/vulkan/runtime/vk_api/QueryPool.cpp
+++ b/backends/vulkan/runtime/vk_api/QueryPool.cpp
@@ -185,19 +185,20 @@ std::vector<ShaderResult> QueryPool::get_shader_timestamp_data() {
   std::vector<ShaderResult> shader_result;
   for (ShaderDuration& entry : shader_durations_) {
     shader_result.push_back(ShaderResult{
-        .kernel_name = entry.kernel_name,
-        .dispatch_id = entry.dispatch_id,
-        .start_time_ns = entry.start_time_ns,
-        .end_time_ns = entry.end_time_ns,
-        .metadata = ShaderMetadata{
-            .global_workgroup_size =
-                {entry.global_workgroup_size.width,
-                 entry.global_workgroup_size.height,
-                 entry.global_workgroup_size.depth},
-            .local_workgroup_size =
-                {entry.local_workgroup_size.width,
-                 entry.local_workgroup_size.height,
-                 entry.local_workgroup_size.depth},
+        /* .kernel_name = */ entry.kernel_name,
+        /* .dispatch_id = */ entry.dispatch_id,
+        /* .start_time_ns = */ entry.start_time_ns,
+        /* .end_time_ns = */ entry.end_time_ns,
+        /* .metadata = */
+        ShaderMetadata{
+            /* .global_workgroup_size = */
+            {entry.global_workgroup_size.width,
+             entry.global_workgroup_size.height,
+             entry.global_workgroup_size.depth},
+            /* .local_workgroup_size = */
+            {entry.local_workgroup_size.width,
+             entry.local_workgroup_size.height,
+             entry.local_workgroup_size.depth},
         }});
   }
   return shader_result;

From a131826b5e748735312ca479064387f13f354baa Mon Sep 17 00:00:00 2001
From: Yang Wang <elainewy@meta.com>
Date: Thu, 13 Mar 2025 15:26:36 -0700
Subject: [PATCH 10/19] [Benchmark]Deprecate v2 (#9238)

Issue: https://github.com/pytorch/test-infra/issues/6294
Remove benchmark v2 schema logics, still keep the way to store v3 with
v3 folder, since we might have higher version of schema in the future

next step is introduce the failure handling for benchmark record
---
 .github/scripts/extract_benchmark_results.py | 183 +++++--------------
 .github/workflows/android-perf.yml           |  21 +--
 .github/workflows/apple-perf.yml             |  21 +--
 3 files changed, 54 insertions(+), 171 deletions(-)

diff --git a/.github/scripts/extract_benchmark_results.py b/.github/scripts/extract_benchmark_results.py
index ba6142a4826..77c73eab0b4 100755
--- a/.github/scripts/extract_benchmark_results.py
+++ b/.github/scripts/extract_benchmark_results.py
@@ -86,36 +86,6 @@ def parse_args() -> Any:
         action=ValidateDir,
         help="the directory to keep the benchmark results",
     )
-    parser.add_argument(
-        "--repo",
-        type=str,
-        required=True,
-        help="which GitHub repo this workflow run belongs to",
-    )
-    parser.add_argument(
-        "--head-branch",
-        type=str,
-        required=True,
-        help="the head branch that runs",
-    )
-    parser.add_argument(
-        "--workflow-name",
-        type=str,
-        required=True,
-        help="the name of the benchmark workflow",
-    )
-    parser.add_argument(
-        "--workflow-run-id",
-        type=int,
-        required=True,
-        help="the id of the benchmark workflow",
-    )
-    parser.add_argument(
-        "--workflow-run-attempt",
-        type=int,
-        required=True,
-        help="which retry of the workflow this is",
-    )
     parser.add_argument(
         "--benchmark-configs",
         type=str,
@@ -153,9 +123,10 @@ def extract_android_benchmark_results(
         # This is to handle the case where there is no benchmark results
         warning(f"Fail to load the benchmark results from {artifact_s3_url}")
         return []
+    return []
 
 
-def initialize_ios_metadata(test_name: str) -> Dict[str, any]:
+def initialize_ios_metadata(test_name: str) -> Dict[str, Any]:
     """
     Extract the benchmark metadata from the test name, for example:
         test_forward_llama2_pte_iOS_17_2_1_iPhone15_4
@@ -364,14 +335,7 @@ def transform(
     app_type: str,
     benchmark_results: List,
     benchmark_config: Dict[str, str],
-    repo: str,
-    head_branch: str,
-    workflow_name: str,
-    workflow_run_id: int,
-    workflow_run_attempt: int,
     job_name: str,
-    job_id: int,
-    schema_version: str,
 ) -> List:
     """
     Transform the benchmark results into the format writable into the benchmark database
@@ -381,87 +345,51 @@ def transform(
     for r in benchmark_results:
         r["deviceInfo"]["device"] = job_name
 
-    if schema_version == "v2":
-        # TODO (huydhn): Clean up this branch after ExecuTorch dashboard migrates to v3
-        return [
-            {
-                # GH-info to identify where the benchmark is run
-                "repo": repo,
-                "head_branch": head_branch,
-                "workflow_id": workflow_run_id,
-                "run_attempt": workflow_run_attempt,
-                "job_id": job_id,
-                # The model
-                "name": f"{r['benchmarkModel']['name']} {r['benchmarkModel'].get('backend', '')}".strip(),
-                "dtype": (
-                    r["benchmarkModel"]["quantization"]
-                    if r["benchmarkModel"]["quantization"]
-                    else "unknown"
-                ),
-                # The metric value
-                "metric": r["metric"],
-                "actual": r["actualValue"],
-                "target": r["targetValue"],
-                # The device
-                "device": r["deviceInfo"]["device"],
-                "arch": r["deviceInfo"].get("os", ""),
-                # Not used here, just set it to something unique here
-                "filename": workflow_name,
-                "test_name": app_type,
-                "runner": job_name,
-            }
-            for r in benchmark_results
-        ]
-    elif schema_version == "v3":
-        v3_benchmark_results = []
-        # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
-        return [
-            {
-                "benchmark": {
-                    "name": "ExecuTorch",
-                    "mode": "inference",
-                    "extra_info": {
-                        "app_type": app_type,
-                        # Just keep a copy of the benchmark config here
-                        "benchmark_config": json.dumps(benchmark_config),
-                    },
-                },
-                "model": {
-                    "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
-                    "type": "OSS model",
-                    "backend": benchmark_config.get(
-                        "config", r["benchmarkModel"].get("backend", "")
-                    ),
+    # From https://github.com/pytorch/pytorch/wiki/How-to-integrate-with-PyTorch-OSS-benchmark-database
+    return [
+        {
+            "benchmark": {
+                "name": "ExecuTorch",
+                "mode": "inference",
+                "extra_info": {
+                    "app_type": app_type,
+                    # Just keep a copy of the benchmark config here
+                    "benchmark_config": json.dumps(benchmark_config),
                 },
-                "metric": {
-                    "name": r["metric"],
-                    "benchmark_values": [r["actualValue"]],
-                    "target_value": r["targetValue"],
-                    "extra_info": {
-                        "method": r.get("method", ""),
-                    },
+            },
+            "model": {
+                "name": benchmark_config.get("model", r["benchmarkModel"]["name"]),
+                "type": "OSS model",
+                "backend": benchmark_config.get(
+                    "config", r["benchmarkModel"].get("backend", "")
+                ),
+            },
+            "metric": {
+                "name": r["metric"],
+                "benchmark_values": [r["actualValue"]],
+                "target_value": r["targetValue"],
+                "extra_info": {
+                    "method": r.get("method", ""),
                 },
-                "runners": [
-                    {
-                        "name": r["deviceInfo"]["device"],
-                        "type": r["deviceInfo"]["os"],
-                        "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
-                        "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
-                    }
-                ],
-            }
-            for r in benchmark_results
-        ]
+            },
+            "runners": [
+                {
+                    "name": r["deviceInfo"]["device"],
+                    "type": r["deviceInfo"]["os"],
+                    "avail_mem_in_gb": r["deviceInfo"].get("availMem", ""),
+                    "total_mem_in_gb": r["deviceInfo"].get("totalMem", ""),
+                }
+            ],
+        }
+        for r in benchmark_results
+    ]
 
 
 def main() -> None:
     args = parse_args()
 
     # Across all devices, keeping both schemas for now until ExecuTorch dashboard migrates to v3
-    all_benchmark_results = {
-        "v2": [],
-        "v3": [],
-    }
+    all_benchmark_results = []
     benchmark_config = {}
 
     with open(args.artifacts) as f:
@@ -482,7 +410,7 @@ def main() -> None:
                 benchmark_config = read_benchmark_config(
                     artifact_s3_url, args.benchmark_configs
                 )
-
+            benchmark_results = []
             if app_type == "ANDROID_APP":
                 benchmark_results = extract_android_benchmark_results(
                     job_name, artifact_type, artifact_s3_url
@@ -494,32 +422,17 @@ def main() -> None:
                 )
 
             if benchmark_results:
-                for schema in all_benchmark_results.keys():
-                    results = transform(
-                        app_type,
-                        benchmark_results,
-                        benchmark_config,
-                        args.repo,
-                        args.head_branch,
-                        args.workflow_name,
-                        args.workflow_run_id,
-                        args.workflow_run_attempt,
-                        job_name,
-                        extract_job_id(args.artifacts),
-                        schema,
-                    )
-                    all_benchmark_results[schema].extend(results)
-
-    for schema in all_benchmark_results.keys():
-        if not all_benchmark_results.get(schema):
-            continue
-
-        output_dir = os.path.join(args.output_dir, schema)
-        os.makedirs(output_dir, exist_ok=True)
+                results = transform(
+                    app_type, benchmark_results, benchmark_config, job_name
+                )
+                all_benchmark_results.extend(results)
 
+        # add v3 in case we have higher version of schema
+        output_dir = os.path.join(args.output_dir, "v3")
+        os.makedirs(output_dir, exist_ok=True)
         output_file = os.path.basename(args.artifacts)
         with open(f"{output_dir}/{output_file}", "w") as f:
-            json.dump(all_benchmark_results[schema], f)
+            json.dump(all_benchmark_results, f)
 
 
 if __name__ == "__main__":
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index f21ed849d03..fbd2cae24e0 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -462,29 +462,14 @@ jobs:
             ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
               --artifacts "${ARTIFACTS_BY_JOB}" \
               --output-dir benchmark-results \
-              --repo ${{ github.repository }} \
-              --head-branch ${{ github.head_ref || github.ref_name }} \
-              --workflow-name "${{ github.workflow }}" \
-              --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }} \
               --benchmark-configs benchmark-configs
           done
 
-          for SCHEMA in v2 v3; do
-            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
-              cat "${BENCHMARK_RESULTS}"
-              echo
-            done
+          for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
+            cat "${BENCHMARK_RESULTS}"
+            echo
           done
 
-      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
-      - name: Upload the benchmark results (v2)
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
-        with:
-          benchmark-results-dir: benchmark-results/v2
-          dry-run: false
-          schema-version: v2
-
       - name: Upload the benchmark results (v3)
         uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
         with:
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index 83778d36c1b..1cf7e67f007 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -521,29 +521,14 @@ jobs:
             ${CONDA_RUN} python .github/scripts/extract_benchmark_results.py \
               --artifacts "${ARTIFACTS_BY_JOB}" \
               --output-dir benchmark-results \
-              --repo ${{ github.repository }} \
-              --head-branch ${{ github.head_ref || github.ref_name }} \
-              --workflow-name "${{ github.workflow }}" \
-              --workflow-run-id ${{ github.run_id }} \
-              --workflow-run-attempt ${{ github.run_attempt }} \
               --benchmark-configs benchmark-configs
           done
 
-          for SCHEMA in v2 v3; do
-            for BENCHMARK_RESULTS in benchmark-results/"${SCHEMA}"/*.json; do
-              cat "${BENCHMARK_RESULTS}"
-              echo
-            done
+          for BENCHMARK_RESULTS in benchmark-results/v3/*.json; do
+            cat "${BENCHMARK_RESULTS}"
+            echo
           done
 
-      # TODO (huydhn): Remove v2 schema once the benchmark dashboard finishes the migration
-      - name: Upload the benchmark results (v2)
-        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
-        with:
-          benchmark-results-dir: benchmark-results/v2
-          dry-run: false
-          schema-version: v2
-
       - name: Upload the benchmark results (v3)
         uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
         with:

From e91c0854ca42f84c8e25cfc2f680eb26639f8f14 Mon Sep 17 00:00:00 2001
From: Christian Lang <cwlang@meta.com>
Date: Thu, 13 Mar 2025 18:44:10 -0400
Subject: [PATCH 11/19] Adding dummy coreml backend to silence uquery failures

Differential Revision: D70645217

Pull Request resolved: https://github.com/pytorch/executorch/pull/9230
---
 backends/apple/coreml/TARGETS | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/backends/apple/coreml/TARGETS b/backends/apple/coreml/TARGETS
index df1165dd74e..a8802e99b56 100644
--- a/backends/apple/coreml/TARGETS
+++ b/backends/apple/coreml/TARGETS
@@ -5,6 +5,14 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
 oncall("executorch")
 
+# TODO: this is a placeholder to support internal fbcode build. We should add the coreml backend target properly.
+runtime.python_library(
+    name = "coreml",
+    visibility = [
+        "@EXECUTORCH_CLIENTS",
+    ],
+)
+
 runtime.python_library(
     name = "backend",
     srcs = glob([

From ce612b8c90356feba144e6cfec92d545c908c136 Mon Sep 17 00:00:00 2001
From: "Mengtao (Martin) Yuan" <myuan@fb.com>
Date: Thu, 13 Mar 2025 15:44:18 -0700
Subject: [PATCH 12/19] ping a newer pytorch nightly to include recent export
 updates

Differential Revision: D71092194

Pull Request resolved: https://github.com/pytorch/executorch/pull/9216
---
 install_requirements.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/install_requirements.py b/install_requirements.py
index 06dfbd9e9a6..9353dad180e 100644
--- a/install_requirements.py
+++ b/install_requirements.py
@@ -67,7 +67,7 @@ def python_is_compatible():
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-NIGHTLY_VERSION = "dev20250301"
+NIGHTLY_VERSION = "dev20250311"
 
 
 def install_requirements(use_pytorch_nightly):

From e9cf64a7f6816bf4380a2cd68320e41f22be03ba Mon Sep 17 00:00:00 2001
From: Scott Wolchok <swolchok@meta.com>
Date: Thu, 13 Mar 2025 16:06:20 -0700
Subject: [PATCH 13/19] fix building with CMake + Ninja after #9077 (#9246)

Needed to tell CMake to tell Ninja where flatc comes from.
---
 CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 54e2e1ebfbf..3385bfb6d39 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -509,6 +509,7 @@ if(EXECUTORCH_BUILD_FLATC)
                -DCMAKE_BUILD_TYPE=Release
                -DCMAKE_CXX_FLAGS="-DFLATBUFFERS_MAX_ALIGNMENT=${FLATBUFFERS_MAX_ALIGNMENT}"
     INSTALL_COMMAND ""
+    BUILD_BYPRODUCTS <BINARY_DIR>/flatc
   )
   ExternalProject_Get_Property(flatbuffers BINARY_DIR)
   set(FLATC_EXECUTABLE ${BINARY_DIR}/flatc)

From b5d8e3b6ee3020fefc944f14fd21db5193b92f2d Mon Sep 17 00:00:00 2001
From: Berker Soyluoglu <bsoyluoglu@meta.com>
Date: Thu, 13 Mar 2025 16:06:48 -0700
Subject: [PATCH 14/19] Move ExecutorchRuntimeValueSupport and
 ExecutorchRuntimeBridge to xplat

Differential Revision: D70825991

Pull Request resolved: https://github.com/pytorch/executorch/pull/9244
---
 .../Data/ExecutorchRuntimeTensorValue.h       |  27 +++++
 .../Data/ExecutorchRuntimeTensorValue.mm      | 100 ++++++++++++++++
 .../Exported/Data/ExecutorchRuntimeValue.h    |  28 +++++
 .../Exported/Data/ExecutorchRuntimeValue.mm   |  73 ++++++++++++
 .../Exported/ExecutorchRuntimeEngine.h        |  23 ++++
 .../Exported/ExecutorchRuntimeEngine.mm       | 107 ++++++++++++++++++
 .../__tests__/ExecutorchRuntimeEngineTests.mm |  61 ++++++++++
 .../__tests__/ExecutorchRuntimeValueTests.mm  |  67 +++++++++++
 .../ExecutorchRuntimeValueSupport.swift       |  39 +++++++
 .../ExecutorchRuntimeValueSupportTests.swift  |  42 +++++++
 10 files changed, 567 insertions(+)
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.h
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.mm
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.h
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.mm
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.h
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.mm
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeEngineTests.mm
 create mode 100644 extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeValueTests.mm
 create mode 100644 extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport.swift
 create mode 100644 extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/__tests__/ExecutorchRuntimeValueSupportTests.swift

diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.h b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.h
new file mode 100644
index 00000000000..c0c7dfbc49f
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.h
@@ -0,0 +1,27 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#ifdef __cplusplus
+ #import <executorch/extension/module/module.h>
+ #import <executorch/runtime/core/evalue.h>
+#endif
+#import <ModelRunnerDataKit/ModelRunnerDataKit-Swift.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface ExecutorchRuntimeTensorValue : NSObject <ModelRuntimeTensorValueBridging>
+
+- (instancetype)init NS_UNAVAILABLE;
++ (instancetype)new NS_UNAVAILABLE;
+
+- (instancetype)initWithFloatArray:(NSArray<NSNumber *> *)floatArray shape:(NSArray<NSNumber *> *)sizes NS_SWIFT_NAME(init(floatArray:shape:));
+
+#ifdef __cplusplus
+- (nullable instancetype)initWithTensor:(torch::executor::Tensor)tensor error:(NSError * _Nullable * _Nullable)error;
+- (instancetype)initWithData:(std::vector<float>)floatData
+                       shape:(std::vector<int32_t>)shape NS_DESIGNATED_INITIALIZER;
+- (torch::executor::Tensor)backedValue;
+#endif
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.mm b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.mm
new file mode 100644
index 00000000000..933bbe99e57
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeTensorValue.mm
@@ -0,0 +1,100 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#import "ExecutorchRuntimeTensorValue.h"
+
+#import <memory>
+
+#import <executorch/extension/module/module.h>
+
+using torch::executor::TensorImpl;
+using torch::executor::ScalarType;
+
+@implementation ExecutorchRuntimeTensorValue
+{
+  std::unique_ptr<TensorImpl> _tensor;
+  // TensorImpl DOES NOT take ownership.
+  // This float vector is what keeps the data in memory.
+  std::vector<float> _floatData;
+  std::vector<int32_t> _shape;
+}
+
+- (instancetype)initWithData:(std::vector<float>)floatData
+                       shape:(std::vector<int32_t>)shape
+{
+  if (self = [super init]) {
+    _floatData.assign(floatData.begin(), floatData.end());
+    _shape.assign(shape.begin(), shape.end());
+    _tensor = std::make_unique<TensorImpl>(ScalarType::Float, std::size(_shape), _shape.data(), _floatData.data());
+  }
+  return self;
+}
+
+- (instancetype)initWithFloatArray:(NSArray<NSNumber *> *)floatArray shape:(NSArray<NSNumber *> *)shape
+{
+  std::vector<float> floatVector;
+  std::vector<int32_t> shapeVector;
+
+  floatVector.reserve(floatArray.count);
+  for (int i = 0; i < floatArray.count; i++) {
+    floatVector.push_back([floatArray[i] floatValue]);
+  }
+  shapeVector.reserve(shape.count);
+  for (int i = 0; i < shape.count; i++) {
+    shapeVector.push_back([shape[i] intValue]);
+  }
+
+  return [self initWithData:floatVector shape:shapeVector];
+}
+
+- (nullable instancetype)initWithTensor:(torch::executor::Tensor)tensor error:(NSError * _Nullable * _Nullable)error
+{
+  if (tensor.scalar_type() != ScalarType::Float) {
+    if (error) {
+      *error = [ModelRuntimeValueErrorFactory invalidType:[NSString stringWithFormat:@"torch::executor::ScalarType::%hhd", tensor.scalar_type()] expectedType:@"torch::executor::ScalarType::Float"];
+    }
+    return nil;
+  }
+
+  std::vector<float> floatVector;
+  std::vector<int32_t> shapeVector;
+  shapeVector.assign(tensor.sizes().begin(), tensor.sizes().end());
+  floatVector.assign(tensor.const_data_ptr<float>(), tensor.const_data_ptr<float>() + tensor.numel());
+  return [self initWithData:floatVector shape:shapeVector];
+}
+
+- (nullable ModelRuntimeTensorValueBridgingTuple *)floatRepresentationAndReturnError:(NSError * _Nullable * _Nullable)error
+{
+  if (_tensor->scalar_type() == torch::executor::ScalarType::Float) {
+    const auto *tensorPtr = _tensor->data<float>();
+    const auto sizes = _tensor->sizes();
+    std::vector<float> tensorVec(tensorPtr, tensorPtr + _tensor->numel());
+    std::vector<int32_t> tensorSizes(sizes.begin(), sizes.end());
+
+    NSMutableArray<NSNumber *> *floatArray = [[NSMutableArray alloc] initWithCapacity:tensorVec.size()];
+    for (float &i : tensorVec) {
+      [floatArray addObject:@(i)];
+    }
+
+    NSMutableArray<NSNumber *> *sizesArray = [[NSMutableArray alloc] initWithCapacity:tensorSizes.size()];
+    for (int &tensorSize : tensorSizes) {
+      [sizesArray addObject:@(tensorSize)];
+    }
+
+    return [[ModelRuntimeTensorValueBridgingTuple alloc] initWithFloatArray:floatArray shape:sizesArray];
+  }
+
+  if (error) {
+    *error = [ModelRuntimeValueErrorFactory
+              invalidType:[NSString stringWithFormat:@"torch::executor::ScalarType::%hhd", _tensor->scalar_type()]
+              expectedType:@"torch::executor::ScalarType::Float"];
+  }
+
+  return nil;
+}
+
+- (torch::executor::Tensor)backedValue
+{
+  return torch::executor::Tensor(_tensor.get());
+}
+
+@end
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.h b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.h
new file mode 100644
index 00000000000..591511b2b11
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.h
@@ -0,0 +1,28 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#ifdef __cplusplus
+ #import <executorch/extension/module/module.h>
+ #import <executorch/runtime/core/evalue.h>
+#endif
+
+#import <ModelRunnerDataKit/ModelRunnerDataKit-Swift.h>
+
+#import "ExecutorchRuntimeTensorValue.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface ExecutorchRuntimeValue : NSObject <ModelRuntimeValueBridging>
+
+- (instancetype)init NS_UNAVAILABLE;
++ (instancetype)new NS_UNAVAILABLE;
+
+- (instancetype)initWithTensor:(ExecutorchRuntimeTensorValue *)tensorValue;
+
+#ifdef __cplusplus
+- (instancetype)initWithEValue:(torch::executor::EValue)value NS_DESIGNATED_INITIALIZER;
+- (torch::executor::EValue)getBackedValue;
+#endif
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.mm b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.mm
new file mode 100644
index 00000000000..f8fb8c4a419
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/Data/ExecutorchRuntimeValue.mm
@@ -0,0 +1,73 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#import "ExecutorchRuntimeValue.h"
+
+#import <map>
+#import <vector>
+
+#import "ExecutorchRuntimeTensorValue.h"
+
+using torch::executor::EValue;
+
+@implementation ExecutorchRuntimeValue
+{
+  EValue _value;
+  // IMPORTANT
+  // Tensor value keeps a reference to the original tensor value. However, the value that is wrapped by LiteInterpreterRuntimeTensorValue DOES NOT TAKE OWNERSHIP OF THE RAW DATA!
+  // This means once the wrapper is deallocated, the tensor value will be deallocated as well.
+  // This reference here is to keep the tensor value alive until the runtime is deallocated.
+  ExecutorchRuntimeTensorValue *_tensorValue;
+}
+
+- (instancetype)initWithEValue:(EValue)value
+{
+  if (self = [super init]) {
+    _value = value;
+  }
+  return self;
+}
+
+- (instancetype)initWithTensor:(ExecutorchRuntimeTensorValue *)tensorValue
+{
+  if (self = [self initWithEValue:EValue([tensorValue backedValue])]) {
+    _tensorValue = tensorValue;
+  }
+  return self;
+}
+
+- (nullable NSString *)stringValueAndReturnError:(NSError * _Nullable * _Nullable)error
+{
+  if (error) {
+    *error = [ModelRuntimeValueErrorFactory unsupportedType:@"ExecutorchRuntimeValue doesn't support strings"];
+  }
+  return nil;
+}
+
+- (nullable id<ModelRuntimeTensorValueBridging>)tensorValueAndReturnError:(NSError * _Nullable * _Nullable)error
+{
+  if (_value.isTensor()) {
+    return [[ExecutorchRuntimeTensorValue alloc] initWithTensor:_value.toTensor() error:error];
+  }
+
+  if (error) {
+    *error = [ModelRuntimeValueErrorFactory
+              invalidType:[NSString stringWithFormat:@"Tag::%d", _value.tag]
+              expectedType:@"Tag::Tensor"];
+  }
+  return nil;
+}
+
+- (EValue)getBackedValue
+{
+  return _value;
+}
+
+- (NSArray<id<ModelRuntimeValueBridging>> *)arrayValueAndReturnError:(NSError * _Nullable * _Nullable)error
+{
+  if (error) {
+    *error = [ModelRuntimeValueErrorFactory unsupportedType:@"EValue doesn't support arrays"];
+  }
+  return nil;
+}
+
+@end
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.h b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.h
new file mode 100644
index 00000000000..be965c87a6f
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.h
@@ -0,0 +1,23 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#import <Foundation/Foundation.h>
+
+#import "ExecutorchRuntimeValue.h"
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface ExecutorchRuntimeEngine : NSObject
+
+- (nonnull instancetype)init NS_UNAVAILABLE;
++ (nonnull instancetype)new NS_UNAVAILABLE;
+
+- (nullable instancetype)initWithModelPath:(NSString *)modelPath
+                           modelMethodName:(NSString *)modelMethodName
+                                     error:(NSError * _Nullable * _Nullable)error NS_DESIGNATED_INITIALIZER;
+
+- (nullable NSArray<ExecutorchRuntimeValue *> *)infer:(NSArray<ExecutorchRuntimeValue *> *)input
+                                                error:(NSError * _Nullable * _Nullable)error NS_SWIFT_NAME(infer(input:));
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.mm b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.mm
new file mode 100644
index 00000000000..45a527bd1c0
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/Exported/ExecutorchRuntimeEngine.mm
@@ -0,0 +1,107 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#import "ExecutorchRuntimeEngine.h"
+
+#import <map>
+#import <vector>
+
+#import <executorch/extension/module/module.h>
+
+static int kInitFailed = 0;
+static int kInferenceFailed = 1;
+
+static auto NSStringToString(NSString *string) -> std::string
+{
+  const char *cStr = [string cStringUsingEncoding:NSUTF8StringEncoding];
+  if (cStr) {
+    return cStr;
+  }
+
+  NSData *data = [string dataUsingEncoding:NSUTF8StringEncoding allowLossyConversion:NO];
+  return {reinterpret_cast<const char *>([data bytes]), [data length]};
+}
+
+static auto StringToNSString(const std::string &string) -> NSString *
+{
+  CFStringRef cfString = CFStringCreateWithBytes(
+    kCFAllocatorDefault,
+    reinterpret_cast<const UInt8 *>(string.c_str()),
+    string.size(),
+    kCFStringEncodingUTF8,
+    false
+  );
+  return (__bridge_transfer NSString *)cfString;
+}
+
+@implementation ExecutorchRuntimeEngine
+{
+  NSString *_modelPath;
+  NSString *_modelMethodName;
+  std::unique_ptr<torch::executor::Module> _module;
+}
+
+- (instancetype)initWithModelPath:(NSString *)modelPath
+                  modelMethodName:(NSString *)modelMethodName
+                            error:(NSError * _Nullable * _Nullable)error
+{
+  if (self = [super init]) {
+    _modelPath = modelPath;
+    _modelMethodName = modelMethodName;
+    try {
+      _module = std::make_unique<torch::executor::Module>(NSStringToString(modelPath));
+      const auto e = _module->load_method(NSStringToString(modelMethodName));
+      if (e != executorch::runtime::Error::Ok) {
+        if (error) {
+          *error = [NSError errorWithDomain:@"ExecutorchRuntimeEngine"
+                                       code:kInitFailed
+                                   userInfo:@{NSDebugDescriptionErrorKey : StringToNSString(std::to_string(static_cast<uint32_t>(e)))}];
+        }
+        return nil;
+      }
+    } catch (...) {
+      if (error) {
+        *error = [NSError errorWithDomain:@"ExecutorchRuntimeEngine"
+                                     code:kInitFailed
+                                 userInfo:@{NSDebugDescriptionErrorKey : @"Unknown error"}];
+      }
+      return nil;
+    }
+  }
+  return self;
+}
+
+- (nullable NSArray<ExecutorchRuntimeValue *> *)infer:(NSArray<ExecutorchRuntimeValue *> *)input
+                                                error:(NSError * _Nullable * _Nullable)error
+{
+  try {
+    std::vector<torch::executor::EValue> inputEValues;
+    inputEValues.reserve(input.count);
+    for (ExecutorchRuntimeValue *inputValue in input) {
+      inputEValues.push_back([inputValue getBackedValue]);
+    }
+    const auto result = _module->execute(NSStringToString(_modelMethodName), inputEValues);
+    if (!result.ok()) {
+      const auto executorchError = static_cast<uint32_t>(result.error());
+      if (error) {
+        *error = [NSError errorWithDomain:@"ExecutorchRuntimeEngine"
+                                     code:kInferenceFailed
+                                 userInfo:@{NSDebugDescriptionErrorKey : StringToNSString(std::to_string(executorchError))}];
+      }
+      return nil;
+    }
+    NSMutableArray<ExecutorchRuntimeValue *> *const resultValues = [NSMutableArray new];
+    for (const auto &evalue : result.get()) {
+      [resultValues addObject:[[ExecutorchRuntimeValue alloc] initWithEValue:evalue]];
+    }
+    return resultValues;
+  } catch (...) {
+    if (error) {
+      *error = [NSError errorWithDomain:@"LiteInterpreterRuntimeEngine"
+                                   code:kInferenceFailed
+                               userInfo:@{NSDebugDescriptionErrorKey : @"Unknown error"}];
+    }
+    return nil;
+  }
+}
+
+@end
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeEngineTests.mm b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeEngineTests.mm
new file mode 100644
index 00000000000..de59902dfca
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeEngineTests.mm
@@ -0,0 +1,61 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#import <XCTest/XCTest.h>
+
+#import <ExecutorchRuntimeBridge/ExecutorchRuntimeEngine.h>
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface ExecutorchRuntimeEngineTests : XCTestCase
+@end
+
+@implementation ExecutorchRuntimeEngineTests
+
+- (void)testInvalidModel
+{
+  NSString *const modelPath = @"invalid_model_path";
+
+  NSError *runtimeInitError = nil;
+  ExecutorchRuntimeEngine *const engine = [[ExecutorchRuntimeEngine alloc] initWithModelPath:modelPath modelMethodName:@"forward" error:&runtimeInitError];
+  XCTAssertNil(engine);
+  XCTAssertNotNil(runtimeInitError);
+
+  XCTAssertEqual(runtimeInitError.code, 0);
+  XCTAssertEqualObjects(runtimeInitError.userInfo[NSDebugDescriptionErrorKey], @"34");
+  // 34 is the code for AccessFailed.
+}
+
+- (void)testValidModel
+{
+  NSBundle *const bundle = [NSBundle bundleForClass:[self class]];
+  // This is a simple model that adds two tensors.
+  NSString *const modelPath = [bundle pathForResource:@"add" ofType:@"pte"];
+  NSError *runtimeInitError = nil;
+  ExecutorchRuntimeEngine *const engine = [[ExecutorchRuntimeEngine alloc] initWithModelPath:modelPath modelMethodName:@"forward" error:&runtimeInitError];
+  XCTAssertNotNil(engine);
+  XCTAssertNil(runtimeInitError);
+
+  ExecutorchRuntimeTensorValue *inputTensor = [[ExecutorchRuntimeTensorValue alloc] initWithFloatArray:@[@2.0] shape:@[@1]];
+  ExecutorchRuntimeValue *inputValue = [[ExecutorchRuntimeValue alloc] initWithTensor:inputTensor];
+
+  NSError *inferenceError = nil;
+  const auto output = [engine infer:@[inputValue, inputValue] error:&inferenceError];
+  XCTAssertNil(inferenceError);
+
+  XCTAssertEqual(output.count, 1);
+  NSError *tensorValueError = nil;
+  NSError *floatRepresentationError = nil;
+  const auto resultTensorValue = [[output.firstObject tensorValueAndReturnError:&tensorValueError]
+                                  floatRepresentationAndReturnError:&floatRepresentationError];
+
+  XCTAssertNil(tensorValueError);
+  XCTAssertNil(floatRepresentationError);
+  XCTAssertEqual(resultTensorValue.floatArray.count, 1);
+  XCTAssertEqual(resultTensorValue.shape.count, 1);
+  XCTAssertEqual(resultTensorValue.floatArray.firstObject.floatValue, 4.0);
+  XCTAssertEqual(resultTensorValue.shape.firstObject.integerValue, 1);
+}
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeValueTests.mm b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeValueTests.mm
new file mode 100644
index 00000000000..742cfb8d40d
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeBridge/ExecutorchRuntimeBridge/__tests__/ExecutorchRuntimeValueTests.mm
@@ -0,0 +1,67 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+#import <XCTest/XCTest.h>
+
+#import <ExecutorchRuntimeBridge/ExecutorchRuntimeValue.h>
+#import <ModelRunnerDataKit/ModelRunnerDataKit-Swift.h>
+#import <executorch/extension/module/module.h>
+
+using torch::executor::EValue;
+using torch::executor::TensorImpl;
+using torch::executor::ScalarType;
+
+@interface ExecutorchRuntimeValueTests : XCTestCase
+@end
+
+@implementation ExecutorchRuntimeValueTests
+
+- (void)testStringValueWithError
+{
+  ExecutorchRuntimeValue *value = [[ExecutorchRuntimeValue alloc] initWithEValue:EValue((int64_t)1)];
+  XCTAssertNil([value stringValueAndReturnError:nil]);
+  NSError *error = nil;
+  XCTAssertNil([value stringValueAndReturnError:&error]);
+  XCTAssertNotNil(error);
+  XCTAssertEqualObjects([error description], @"Unsupported type: ExecutorchRuntimeValue doesn't support strings");
+}
+
+- (void)testTensorValue
+{
+  NSMutableArray *data = [NSMutableArray new];
+  for (int i = 0; i < 10; i++) {
+    [data addObject:@(i + 0.5f)];
+  }
+
+  NSArray *shape = @[@(10)];
+
+  ExecutorchRuntimeTensorValue *tensorValue = [[ExecutorchRuntimeTensorValue alloc] initWithFloatArray:data shape:shape];
+
+  const auto tuple = [tensorValue floatRepresentationAndReturnError:nil];
+  XCTAssertEqualObjects(tuple.floatArray, data);
+  XCTAssertEqualObjects(tuple.shape, shape);
+}
+
+- (void)testTensorValueWithFloatArrayWithError
+{
+  std::vector<std::int16_t> data = {1, 2, 3};
+  std::vector<int32_t> shape = {3};
+  TensorImpl tensorImpl(ScalarType::Int, std::size(shape), shape.data(), data.data());
+
+  XCTAssertNil([[ExecutorchRuntimeTensorValue alloc] initWithTensor:*new torch::executor::Tensor(&tensorImpl) error:nil]);
+  NSError *error = nil;
+  XCTAssertNil([[ExecutorchRuntimeTensorValue alloc] initWithTensor:*new torch::executor::Tensor(&tensorImpl) error:&error]);
+  XCTAssertNotNil(error);
+  XCTAssertEqualObjects([error description], @"Invalid type: torch::executor::ScalarType::3, expected torch::executor::ScalarType::Float");
+}
+
+- (void)testTensorValueWithError
+{
+  ExecutorchRuntimeValue *value = [[ExecutorchRuntimeValue alloc] initWithEValue:EValue((int64_t)1)];
+  XCTAssertNil([value tensorValueAndReturnError:nil]);
+  NSError *error = nil;
+  XCTAssertNil([value tensorValueAndReturnError:&error]);
+  XCTAssertNotNil(error);
+  XCTAssertEqualObjects([error description], @"Invalid type: Tag::4, expected Tag::Tensor");
+}
+
+@end
diff --git a/extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport.swift b/extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport.swift
new file mode 100644
index 00000000000..3fa2f590d85
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport.swift
@@ -0,0 +1,39 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+@_implementationOnly import ExecutorchRuntimeBridge
+import Foundation
+import ModelRunnerDataKit
+
+public struct ExecutorchRuntimeValueSupport {
+
+  public init() {}
+}
+
+extension ExecutorchRuntimeValueSupport: ModelRuntimeValueFactory {
+
+  public func createString(value: String) throws -> ModelRuntimeValue {
+    throw ModelRuntimeValueError.unsupportedType(String(describing: String.self))
+  }
+
+  public func createTensor(value: ModelRuntimeTensorValue) throws -> ModelRuntimeValue {
+    guard let tensorValue = value.innerValue as? ExecutorchRuntimeTensorValue else {
+      throw ModelRuntimeValueError.invalidType(
+        String(describing: value.innerValue.self),
+        String(describing: ExecutorchRuntimeTensorValue.self)
+      )
+    }
+    return ModelRuntimeValue(innerValue: ExecutorchRuntimeValue(tensor: tensorValue))
+  }
+}
+
+extension ExecutorchRuntimeValueSupport: ModelRuntimeTensorValueFactory {
+
+  public func createFloatTensor(value: [Float], shape: [Int]) -> ModelRuntimeTensorValue {
+    ModelRuntimeTensorValue(
+      innerValue: ExecutorchRuntimeTensorValue(
+        floatArray: value.compactMap { NSNumber(value: $0) },
+        shape: shape.compactMap { NSNumber(value: $0) }
+      )
+    )
+  }
+}
diff --git a/extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/__tests__/ExecutorchRuntimeValueSupportTests.swift b/extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/__tests__/ExecutorchRuntimeValueSupportTests.swift
new file mode 100644
index 00000000000..474dc798a42
--- /dev/null
+++ b/extension/apple/ExecutorchRuntimeValueSupport/ExecutorchRuntimeValueSupport/__tests__/ExecutorchRuntimeValueSupportTests.swift
@@ -0,0 +1,42 @@
+// (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+@testable import ExecutorchRuntimeValueSupport
+import XCTest
+
+public extension String {
+
+  /// Returns a random string.
+  /// This useful for testing when we want to ensure that production code
+  /// accidentally pass a test by using the same value as the test.
+  static func random() -> String {
+    UUID().uuidString
+  }
+}
+
+public extension Float {
+  static func randomPositive() -> Float {
+    .random(in: 1...Float.greatestFiniteMagnitude)
+  }
+}
+
+class ExecutorchRuntimeValueSupportTests: XCTestCase {
+
+  func testTensorValue() throws {
+    let factory = ExecutorchRuntimeValueSupport(),
+        size = 100,
+        data = (1...size).map { _ in Float.randomPositive() },
+        shape = [size]
+
+    let sut = try XCTUnwrap(try? factory.createTensor(value: factory.createFloatTensor(value: data, shape: shape)))
+
+    XCTAssertEqual(try? sut.tensorValue().floatRepresentation().floatArray, data)
+    XCTAssertEqual(try? sut.tensorValue().floatRepresentation().shape, shape)
+  }
+
+  func testCreateStringsThrows() {
+    let factory = ExecutorchRuntimeValueSupport(),
+        value: String = .random()
+
+    XCTAssertThrowsError(try factory.createString(value: value))
+  }
+}

From 27bacff5371aa36833069a2f230568c8442d2b73 Mon Sep 17 00:00:00 2001
From: "Mengtao (Martin) Yuan" <myuan@fb.com>
Date: Thu, 13 Mar 2025 16:20:12 -0700
Subject: [PATCH 15/19] Export Mimi model to ExecuTorch

Differential Revision: D71039057

Pull Request resolved: https://github.com/pytorch/executorch/pull/8753
---
 .../models/moshi/mimi/install_requirements.sh |  15 ++
 examples/models/moshi/mimi/test_mimi.py       | 156 ++++++++++++++++++
 2 files changed, 171 insertions(+)
 create mode 100755 examples/models/moshi/mimi/install_requirements.sh
 create mode 100644 examples/models/moshi/mimi/test_mimi.py

diff --git a/examples/models/moshi/mimi/install_requirements.sh b/examples/models/moshi/mimi/install_requirements.sh
new file mode 100755
index 00000000000..2de86466130
--- /dev/null
+++ b/examples/models/moshi/mimi/install_requirements.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -x
+
+pip install -U moshi
+pip install bitsandbytes
+# Run llama2/install requirements for torchao deps
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+bash "$SCRIPT_DIR"/../llama/install_requirements.sh
diff --git a/examples/models/moshi/mimi/test_mimi.py b/examples/models/moshi/mimi/test_mimi.py
new file mode 100644
index 00000000000..54b6b0d33ad
--- /dev/null
+++ b/examples/models/moshi/mimi/test_mimi.py
@@ -0,0 +1,156 @@
+import io
+import os
+import random
+import unittest
+
+import numpy as np
+import requests
+import torch
+import torch.nn as nn
+import torchaudio
+
+from huggingface_hub import hf_hub_download
+from moshi.models import loaders
+from torch.export import export, ExportedProgram
+
+
+def read_mp3_from_url(url):
+    response = requests.get(url)
+    response.raise_for_status()  # Ensure request is successful
+    audio_stream = io.BytesIO(response.content)
+    waveform, sample_rate = torchaudio.load(audio_stream, format="mp3")
+    return waveform.numpy(), sample_rate
+
+
+class TestMimiModel(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        """Setup once for all tests: Load model and prepare test data."""
+
+        # Get environment variables (if set), otherwise use default values
+        mimi_weight = os.getenv("MIMI_WEIGHT", None)
+        hf_repo = os.getenv("HF_REPO", loaders.DEFAULT_REPO)
+        device = "cuda" if torch.cuda.device_count() else "cpu"
+
+        def seed_all(seed):
+            torch.manual_seed(seed)
+            if torch.cuda.is_available():
+                torch.cuda.manual_seed(seed)
+                torch.cuda.manual_seed_all(seed)
+            random.seed(seed)
+            np.random.seed(seed)
+            torch.backends.cudnn.deterministic = True
+            torch.backends.cudnn.benchmark = False
+
+        seed_all(42424242)
+
+        if mimi_weight is None:
+            mimi_weight = hf_hub_download(hf_repo, loaders.MIMI_NAME)
+        cls.mimi = loaders.get_mimi(mimi_weight, device)
+        cls.device = device
+        cls.sample_pcm, cls.sample_sr = read_mp3_from_url(
+            "https://huggingface.co/lmz/moshi-swift/resolve/main/bria-24khz.mp3"
+        )
+
+    def test_mp3_loading(self):
+        """Ensure MP3 file loads correctly."""
+        self.assertIsInstance(self.sample_pcm, np.ndarray)
+        self.assertGreater(self.sample_sr, 0)
+
+    def test_encoding(self):
+        """Ensure encoding produces expected tensor shape."""
+        pcm_chunk_size = int(self.mimi.sample_rate / self.mimi.frame_rate)
+        sample_pcm = torch.tensor(self.sample_pcm, device=self.device)
+        sample_pcm = sample_pcm[None]
+        chunk = sample_pcm[..., 0:pcm_chunk_size]
+        encoded = self.mimi.encode(chunk)
+        self.assertIsInstance(encoded, torch.Tensor)
+        self.assertGreater(encoded.shape[-1], 0)
+
+    def test_decoding(self):
+        """Ensure decoding produces expected output."""
+        pcm_chunk_size = int(self.mimi.sample_rate / self.mimi.frame_rate)
+        sample_pcm = torch.tensor(self.sample_pcm, device=self.device)[None]
+        chunk = sample_pcm[..., 0:pcm_chunk_size]
+        encoded = self.mimi.encode(chunk)
+        decoded = self.mimi.decode(encoded)
+        self.assertIsInstance(decoded, torch.Tensor)
+
+    def test_streaming_encoding_decoding(self):
+        """Test streaming encoding and decoding consistency."""
+        pcm_chunk_size = int(self.mimi.sample_rate / self.mimi.frame_rate)
+        sample_rate = self.mimi.sample_rate
+        max_duration_sec = 10.0
+        max_duration_len = int(sample_rate * max_duration_sec)
+
+        sample_pcm = torch.tensor(self.sample_pcm, device=self.device)
+        if sample_pcm.shape[-1] > max_duration_len:
+            sample_pcm = sample_pcm[..., :max_duration_len]
+        sample_pcm = sample_pcm[None].to(device=self.device)
+
+        all_codes = []
+        for start_idx in range(0, sample_pcm.shape[-1], pcm_chunk_size):
+            end_idx = min(sample_pcm.shape[-1], start_idx + pcm_chunk_size)
+            chunk = sample_pcm[..., start_idx:end_idx]
+            codes = self.mimi.encode(chunk)
+            if codes.shape[-1]:
+                all_codes.append(codes)
+
+        all_codes_th = torch.cat(all_codes, dim=-1)
+
+        all_pcms = []
+        with self.mimi.streaming(1):
+            for i in range(all_codes_th.shape[-1]):
+                codes = all_codes_th[..., i : i + 1]
+                pcm = self.mimi.decode(codes)
+                all_pcms.append(pcm)
+        all_pcms = torch.cat(all_pcms, dim=-1)
+
+        pcm_ref = self.mimi.decode(all_codes_th)
+        self.assertTrue(torch.allclose(pcm_ref, all_pcms, atol=1e-5))
+
+    def test_exported_decoding(self):
+        """Ensure exported decoding model is consistent with reference output."""
+
+        class MimiDecode(nn.Module):
+            def __init__(self, mimi: nn.Module):
+                super().__init__()
+                self.mimi_model = mimi
+
+            def forward(self, x):
+                return self.mimi_model.decode(x)
+
+        sample_pcm = torch.tensor(self.sample_pcm, device=self.device)[None]
+        pcm_chunk_size = int(self.mimi.sample_rate / self.mimi.frame_rate)
+        chunk = sample_pcm[..., 0:pcm_chunk_size]
+        input = self.mimi.encode(chunk)
+
+        mimi_decode = MimiDecode(self.mimi)
+        ref_decode_output = mimi_decode(input)
+        exported_decode: ExportedProgram = export(mimi_decode, (input,), strict=False)
+        ep_decode_output = exported_decode.module()(input)
+        self.assertTrue(torch.allclose(ep_decode_output, ref_decode_output, atol=1e-6))
+
+    def test_exported_encoding(self):
+        """Ensure exported encoding model is consistent with reference output."""
+
+        class MimiEncode(nn.Module):
+            def __init__(self, mimi: nn.Module):
+                super().__init__()
+                self.mimi_model = mimi
+
+            def forward(self, x):
+                return self.mimi_model.encode(x)
+
+        mimi_encode = MimiEncode(self.mimi)
+        chunk = torch.tensor(self.sample_pcm, device=self.device)[None][
+            ..., 0 : int(self.mimi.sample_rate / self.mimi.frame_rate)
+        ]
+        ref_encode_output = mimi_encode(chunk)
+        exported_encode = export(mimi_encode, (chunk,), strict=False)
+        ep_encode_output = exported_encode.module()(chunk)
+        self.assertTrue(torch.allclose(ep_encode_output, ref_encode_output, atol=1e-6))
+
+
+if __name__ == "__main__":
+    unittest.main()

From 718aa6f8fe5844f5e5b7eb4c692f77485c2c170b Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 13 Mar 2025 19:48:48 -0400
Subject: [PATCH 16/19] Reduce macOS CI jobs and add more Arm64 jobs. (#9228)

Depends on https://github.com/pytorch/executorch/pull/9227/ and
https://github.com/pytorch/executorch/pull/9207

Here's the net result after a sequence of PRs:

- Reduce trunk test-model-macos jobs (from 38 down to 15)
- Add arm64 trunk test-model jobs (from 0 up to 31)
- Add arm64 pull test-model-jobs jobs (+4)
---
 .github/workflows/pull.yml  | 16 ++++++++++++++--
 .github/workflows/trunk.yml | 23 ++++++++++++-----------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 91699d639f3..7cd0ae38565 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -49,11 +49,23 @@ jobs:
         model: [mv3, vit]
         backend: [portable, xnnpack-quantization-delegation]
         build-tool: [cmake, buck2]
-        runner: [linux.2xlarge]
+        runner: [linux.2xlarge, linux.arm64.2xlarge]
+        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
+        # Excluding specific runner + docker image combinations that don't make sense:
+        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+        exclude:
+          - runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+          - runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+          # TODO: Need to figure out why buck2 doesnt work on Graviton instances.
+          - runner: linux.arm64.2xlarge 
+            build-tool: buck2
       fail-fast: false
     with:
       runner: ${{ matrix.runner }}
-      docker-image: executorch-ubuntu-22.04-clang12
+      docker-image: ${{ matrix.docker-image }}
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 90
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 359b8c0f6d3..6dd9f34a9a3 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -23,8 +23,8 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       matrix:
-        model: [add, add_mul, emformer_join, emformer_transcribe, ic3, ic4, linear, llama2, mobilebert, mv2, mv3, resnet18, resnet50, vit, w2l]
-        backend: [portable, xnnpack-quantization-delegation]
+        model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
+        backend: [xnnpack-quantization-delegation]
         include:
           - model: efficient_sam
             backend: portable
@@ -32,16 +32,12 @@ jobs:
             backend: portable
           - model: llama3_2_vision_encoder
             backend: portable
-          - model: lstm
-            backend: portable
-          - model: mul
+          - model: mv3
             backend: portable
           - model: phi-4-mini
             backend: portable
           - model: qwen2_5
             backend: portable
-          - model: softmax
-            backend: portable
       fail-fast: false
     with:
       runner: macos-m1-stable
@@ -53,13 +49,12 @@ jobs:
         MODEL_NAME=${{ matrix.model }}
         BUILD_TOOL=cmake
         BACKEND=${{ matrix.backend }}
-        DEMO_BACKEND_DELEGATION=${{ matrix.demo_backend_delegation }}
 
         bash .ci/scripts/setup-conda.sh
         # Setup MacOS dependencies as there is no Docker support on MacOS atm
         PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}"
         # Build and test executorch
-        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" "${DEMO_BACKEND_DELEGATION}"
+        PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
 
   test-models-linux-aarch64:
     name: test-models-linux-aarch64
@@ -71,10 +66,16 @@ jobs:
       matrix:
         model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe]
         backend: [portable, xnnpack-quantization-delegation]
-        runner: [linux.arm64.2xlarge]
+        include:
+          - model: lstm
+            backend: portable
+          - model: mul
+            backend: portable
+          - model: softmax
+            backend: portable
       fail-fast: false
     with:
-      runner: ${{ matrix.runner }}
+      runner: linux.arm64.2xlarge
       docker-image: executorch-ubuntu-22.04-gcc11-aarch64
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

From 699ee7def80d571e9a37fc6913ba26e49252ded5 Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 13 Mar 2025 20:54:25 -0400
Subject: [PATCH 17/19] Add llama jobs on Arm64 and reduce llama jobs on MacOS
 (#9251)

Reduce macos llama runners

Add arm64 llama runners: distribute into pull.yml and trunk.yml jobs.
---
 .github/workflows/pull.yml  | 23 +++++++++-----
 .github/workflows/trunk.yml | 62 ++++++++++++++++++++++++++++++++++---
 2 files changed, 74 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 7cd0ae38565..75d1db2cd36 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -136,6 +136,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}"
 
   test-llama-runner-linux:
+    # Test Both linux x86 and linux aarch64
     name: test-llama-runner-linux
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     permissions:
@@ -144,21 +145,29 @@ jobs:
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+custom, xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
+        mode: [xnnpack+custom+qe,xnnpack+custom+quantize_kv,xnnpack+quantize_kv]
+        runner: [linux.2xlarge, linux.arm64.2xlarge]
+        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
         include:
-          - dtype: bf16
-            mode: portable
           - dtype: bf16
             mode: custom
+            runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+        # Excluding specific runner + docker image combinations that don't make sense:
+        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+        exclude:
+          - runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+          - runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
       fail-fast: false
     with:
-      runner: linux.2xlarge
-      docker-image: executorch-ubuntu-22.04-clang12
+      runner: ${{ matrix.runner }}
+      docker-image: ${{ matrix.docker-image }}
       submodules: 'true'
       ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
       timeout: 900
-      upload-artifact: android-models
-      upload-artifact-to-s3: true
       script: |
         # The generic Linux job chooses to use base env, not the one setup by the image
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 6dd9f34a9a3..6a472756774 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -283,18 +283,72 @@ jobs:
         # Test ANE llama
         ${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh
 
-  test-llama-runner-macos:
-    name: test-llama-runner-mac
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+  test-llama-runner-linux:
+    # Test Both linux x86 and linux aarch64
+    name: test-llama-runner-linux
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    permissions:
+      id-token: write
+      contents: read
     strategy:
       matrix:
         dtype: [fp32]
-        mode: [portable, xnnpack+kv+custom, mps, coreml, xnnpack+custom+quantize_kv]
+        mode: [portable, xnnpack+custom]
+        runner: [linux.2xlarge, linux.arm64.2xlarge]
+        docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64]
         include:
           - dtype: bf16
             mode: portable
+            runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+          - dtype: bf16
+            mode: portable
+            runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
           - dtype: bf16
             mode: custom
+            runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+        # Excluding specific runner + docker image combinations that don't make sense:
+        #   - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge)
+        #   - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge)
+        exclude:
+          - runner: linux.2xlarge
+            docker-image: executorch-ubuntu-22.04-gcc11-aarch64
+          - runner: linux.arm64.2xlarge
+            docker-image: executorch-ubuntu-22.04-clang12
+      fail-fast: false
+    with:
+      runner: ${{ matrix.runner }}
+      docker-image: ${{ matrix.docker-image }}
+      submodules: 'true'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 900
+      script: |
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        DTYPE=${{ matrix.dtype }}
+        BUILD_TOOL="cmake"
+        MODE=${{ matrix.mode }}
+        ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}"
+        ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}"
+
+        # Setup executorch
+        PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}"
+        # Install requirements for export_llama
+        PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
+        # Test llama2
+        PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}"
+
+  test-llama-runner-macos:
+    name: test-llama-runner-mac
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
+    strategy:
+      matrix:
+        dtype: [fp32]
+        mode: [mps, coreml, xnnpack+custom+quantize_kv]
       fail-fast: false
     with:
       runner: macos-m1-stable

From 9a0c2db32d208079f53df4236480460c5336d857 Mon Sep 17 00:00:00 2001
From: Hansong <107070759+kirklandsign@users.noreply.github.com>
Date: Thu, 13 Mar 2025 18:05:03 -0700
Subject: [PATCH 18/19] Split android instrumentation from build script

Differential Revision: D71161609

Pull Request resolved: https://github.com/pytorch/executorch/pull/9249
---
 .github/workflows/_android.yml         |  1 +
 build/build_android_instrumentation.sh | 40 ++++++++++++++++++++++++++
 build/build_android_library.sh         | 10 -------
 3 files changed, 41 insertions(+), 10 deletions(-)
 create mode 100644 build/build_android_instrumentation.sh

diff --git a/.github/workflows/_android.yml b/.github/workflows/_android.yml
index 82e49d6672e..7061eb72aa3 100644
--- a/.github/workflows/_android.yml
+++ b/.github/workflows/_android.yml
@@ -30,6 +30,7 @@ jobs:
 
         # Build LLM Demo for Android
         bash build/build_android_library.sh ${ARTIFACTS_DIR_NAME}
+        bash build/build_android_instrumentation.sh
 
   # Running Android emulator directly on the runner and not using Docker
   run-emulator:
diff --git a/build/build_android_instrumentation.sh b/build/build_android_instrumentation.sh
new file mode 100644
index 00000000000..91bf03691b0
--- /dev/null
+++ b/build/build_android_instrumentation.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+set -ex
+
+if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
+  PYTHON_EXECUTABLE=python3
+fi
+which "${PYTHON_EXECUTABLE}"
+
+build_android_test() {
+  pushd extension/android_test
+  ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew testDebugUnitTest
+  ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest
+  popd
+}
+
+collect_artifacts_to_be_uploaded() {
+  ARTIFACTS_DIR_NAME="$1"
+  # Collect Java library test
+  JAVA_LIBRARY_TEST_DIR="${ARTIFACTS_DIR_NAME}/library_test_dir"
+  mkdir -p "${JAVA_LIBRARY_TEST_DIR}"
+  cp extension/android_test/build/outputs/apk/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
+  cp extension/android_test/build/outputs/apk/androidTest/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
+}
+
+main() {
+  build_android_test
+  if [ -n "$ARTIFACTS_DIR_NAME" ]; then
+    collect_artifacts_to_be_uploaded ${ARTIFACTS_DIR_NAME}
+  fi
+}
+
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  main "$@"
+fi
diff --git a/build/build_android_library.sh b/build/build_android_library.sh
index 01ea86bf830..32b2210a54e 100644
--- a/build/build_android_library.sh
+++ b/build/build_android_library.sh
@@ -149,11 +149,6 @@ build_android_demo_apps() {
   pushd extension/benchmark/android/benchmark
   ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest
   popd
-
-  pushd extension/android_test
-  ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew testDebugUnitTest
-  ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew build assembleAndroidTest
-  popd
 }
 
 collect_artifacts_to_be_uploaded() {
@@ -172,11 +167,6 @@ collect_artifacts_to_be_uploaded() {
   mkdir -p "${MINIBENCH_APP_DIR}"
   cp extension/benchmark/android/benchmark/app/build/outputs/apk/debug/*.apk "${MINIBENCH_APP_DIR}"
   cp extension/benchmark/android/benchmark/app/build/outputs/apk/androidTest/debug/*.apk "${MINIBENCH_APP_DIR}"
-  # Collect Java library test
-  JAVA_LIBRARY_TEST_DIR="${ARTIFACTS_DIR_NAME}/library_test_dir"
-  mkdir -p "${JAVA_LIBRARY_TEST_DIR}"
-  cp extension/android_test/build/outputs/apk/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
-  cp extension/android_test/build/outputs/apk/androidTest/debug/*.apk "${JAVA_LIBRARY_TEST_DIR}"
 }
 
 main() {

From 79015390f590fc8d05549635f93b09045012d004 Mon Sep 17 00:00:00 2001
From: Mergen Nachin <mnachin@meta.com>
Date: Thu, 13 Mar 2025 21:52:33 -0400
Subject: [PATCH 19/19] Move MacOS jobs (phi-4-mini, qwen2_5) etc to Arm64
 (#9254)

---
 .github/workflows/trunk.yml | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 6a472756774..e907e8215c9 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -23,6 +23,9 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     strategy:
       matrix:
+        # Mac runners are expensive and limited, and non reliable. 
+        # Do some basic testing for macos jobs, and rely mostly on 
+        # test-models-linux-aarch64 job instead.
         model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l]
         backend: [xnnpack-quantization-delegation]
         include:
@@ -34,10 +37,6 @@ jobs:
             backend: portable
           - model: mv3
             backend: portable
-          - model: phi-4-mini
-            backend: portable
-          - model: qwen2_5
-            backend: portable
       fail-fast: false
     with:
       runner: macos-m1-stable
@@ -73,6 +72,12 @@ jobs:
             backend: portable
           - model: softmax
             backend: portable
+          - model: phi-4-mini
+            backend: portable
+          - model: qwen2_5
+            backend: portable
+          - model: llama3_2_vision_encoder
+            backend: portable
       fail-fast: false
     with:
       runner: linux.arm64.2xlarge