pytorch
diff --git a/‎.Package.swift/kernels_torchao/dummy.swift‎ b/‎.Package.swift/kernels_torchao/dummy.swift‎
diff --git a/‎.Package.swift/kernels_torchao_debug/dummy.swift‎ b/‎.Package.swift/kernels_torchao_debug/dummy.swift‎
diff --git a/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 5 additions & 10 deletions b/‎.ci/scripts/test_llama_torchao_lowbit.sh‎
Lines changed: 5 additions & 10 deletions
diff --git a/‎.ci/scripts/zephyr-utils.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/zephyr-utils.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions b/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 63 additions & 25 deletions b/‎CMakeLists.txt‎
Lines changed: 63 additions & 25 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 30 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎Package.swift‎
Lines changed: 5 additions & 0 deletions b/‎Package.swift‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/apple/coreml/TARGETS‎
Lines changed: 0 additions & 1 deletion b/‎backends/apple/coreml/TARGETS‎
Lines changed: 0 additions & 1 deletion
@@ -29,27 +29,22 @@ cmake -DPYTHON_EXECUTABLE=python \
     -DEXECUTORCH_ENABLE_LOGGING=1 \
     -DCMAKE_BUILD_TYPE=Release \
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
-    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
-    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
     -DEXECUTORCH_BUILD_XNNPACK=OFF \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
     -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
+    -DEXECUTORCH_BUILD_KERNELS_TORCHAO=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM_RUNNER=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_LLM=ON \
     -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
     -Bcmake-out .
-cmake --build cmake-out -j16 --target install --config Release
+cmake --build cmake-out -j16 --config Release --target install
 
 # Install llama runner with torchao
 cmake -DPYTHON_EXECUTABLE=python \
-    -DBUILD_TESTING=OFF \
     -DCMAKE_BUILD_TYPE=Release \
-    -DEXECUTORCH_BUILD_KERNELS_LLM=ON \
-    -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-    -DEXECUTORCH_BUILD_XNNPACK=OFF \
-    -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_TORCHAO=ON \
     -Bcmake-out/examples/models/llama \
     examples/models/llama
 cmake --build cmake-out/examples/models/llama -j16 --config Release
 
@@ -6,9 +6,9 @@
 # LICENSE file in the root directory of this source tree.
 
 download_arm_zephyr_sdk () {
-    wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.16.0/zephyr-sdk-0.16.0_linux-x86_64.tar.xz
-    tar -xf zephyr-sdk-0.16.0_linux-x86_64.tar.xz
-    rm -f zephyr-sdk-0.16.0_linux-x86_64.tar.xz
+    wget https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/zephyr-sdk-0.17.2_linux-x86_64.tar.xz
+    tar -xf zephyr-sdk-0.17.2_linux-x86_64.tar.xz
+    rm -f zephyr-sdk-0.17.2_linux-x86_64.tar.xz
 }
 
 setup_zephyr_et_module () {
 
@@ -92,7 +92,7 @@ jobs:
 
         # TODO @Bujji: Should see if this can be moved into the docker image itself
         download_arm_zephyr_sdk
-        ./zephyr-sdk-0.16.0/setup.sh -c -t arm-zephyr-eabi
+        ./zephyr-sdk-0.17.2/setup.sh -c -t arm-zephyr-eabi
         cd $ZEPHYR_PROJ_ROOT
         setup_zephyr_et_module
 
@@ -485,7 +485,7 @@ jobs:
         eval "$(conda shell.bash hook)"
 
         # Install requirements
-        ${CONDA_RUN} EXECUTORCH_BUILD_TORCHAO=1 python install_executorch.py
+        ${CONDA_RUN} EXECUTORCH_BUILD_KERNELS_TORCHAO=1 python install_executorch.py
         ${CONDA_RUN} sh examples/models/llama/install_requirements.sh
 
         # Run test
 
@@ -1,9 +1,6 @@
 [submodule "backends/arm/third-party/ethos-u-core-driver"]
 	path = backends/arm/third-party/ethos-u-core-driver
 	url = https://git.gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-core-driver.git
-[submodule "backends/arm/third-party/serialization_lib"]
-	path = backends/arm/third-party/serialization_lib
-	url = https://git.gitlab.arm.com/tosa/tosa-serialization.git
 [submodule "backends/vulkan/third-party/Vulkan-Headers"]
 	path = backends/vulkan/third-party/Vulkan-Headers
 	url = https://github.com/KhronosGroup/Vulkan-Headers
 
@@ -50,7 +50,10 @@
 cmake_minimum_required(VERSION 3.29)
 project(executorch)
 
+set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
+
 include(${PROJECT_SOURCE_DIR}/tools/cmake/common/preset.cmake)
+include(${PROJECT_SOURCE_DIR}/tools/cmake/Codegen.cmake)
 include(${PROJECT_SOURCE_DIR}/tools/cmake/Utils.cmake)
 include(CMakeDependentOption)
 include(ExternalProject)
@@ -123,8 +126,6 @@ set(CMAKE_INSTALL_RPATH_USE_LINK_PATH ON)
 # Instead please use `find_package(executorch REQUIRED)` in the example
 # directory and add a new executable in the example `CMakeLists.txt`.
 
-set(EXECUTORCH_ROOT ${CMAKE_CURRENT_SOURCE_DIR})
-
 if(NOT EXECUTORCH_ENABLE_LOGGING)
   # Avoid pulling in the logging strings, which can be large. Note that this
   # will set the compiler flag for all targets in this directory, and for all
@@ -278,29 +279,6 @@ if(EXECUTORCH_BUILD_PTHREADPOOL)
   )
 endif()
 
-if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
-  set(TORCHAO_BUILD_ATEN_OPS OFF)
-  set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
-  set(TORCHAO_BUILD_CPU_AARCH64 ON)
-  set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
-
-  list(
-    APPEND
-    TORCHAO_INCLUDE_DIRS
-    ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
-    ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
-    ${EXECUTORCH_ROOT}/third-party/ao
-  )
-
-  set(EXECUTORCH_INCLUDE_DIRS ${TORCHAO_INCLUDE_DIRS})
-
-  add_subdirectory(
-    ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
-  )
-  executorch_target_link_options_shared_lib(torchao_ops_executorch)
-  list(APPEND _executorch_kernels torchao_ops_executorch)
-endif()
-
 if(EXECUTORCH_BUILD_TESTS)
   set(EXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR ON)
   include(CTest)
@@ -343,6 +321,7 @@ if(NOT EXECUTORCH_SRCS_FILE)
   message(STATUS "executorch: Generating source lists")
   set(EXECUTORCH_SRCS_FILE "${CMAKE_CURRENT_BINARY_DIR}/executorch_srcs.cmake")
   extract_sources(${EXECUTORCH_SRCS_FILE})
+  executorch_validate_build_variables()
 endif()
 
 # This file defines the `_<target>__srcs` variables used below.
@@ -705,6 +684,65 @@ if(EXECUTORCH_BUILD_PTHREADPOOL AND EXECUTORCH_BUILD_CPUINFO)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/threadpool)
 endif()
 
+if(EXECUTORCH_BUILD_KERNELS_TORCHAO)
+  if(NOT TARGET cpuinfo)
+    message(
+      FATAL_ERROR
+        "EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_CPUINFO be set ON"
+    )
+  endif()
+  if(NOT TARGET pthreadpool)
+    message(
+      FATAL_ERROR
+        "EXECUTORCH_BUILD_KERNELS_TORCHAO requires EXECUTORCH_BUILD_PTHREADPOOL be set ON"
+    )
+  endif()
+
+  # Configure TorchAO kernels
+  set(TORCHAO_BUILD_ATEN_OPS OFF)
+  set(TORCHAO_BUILD_EXECUTORCH_OPS ON)
+  set(TORCHAO_BUILD_CPU_AARCH64 ON)
+  set(TORCHAO_ENABLE_ARM_NEON_DOT ON)
+  set(TORCHAO_BUILD_KLEIDIAI ON)
+
+  # TorchAO kernels look for EXECUTORCH_INCLUDE_DIRS
+  if(DEFINED EXECUTORCH_INCLUDE_DIRS)
+    message(FATAL_ERROR "EXECUTORCH_INCLUDE_DIRS is already defined")
+  endif()
+  set(EXECUTORCH_INCLUDE_DIRS
+      ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/pthreadpool/include
+      ${EXECUTORCH_ROOT}/backends/xnnpack/third-party/cpuinfo/include
+  )
+  add_subdirectory(
+    ${CMAKE_CURRENT_SOURCE_DIR}/third-party/ao/torchao/experimental
+  )
+  unset(EXECUTORCH_INCLUDE_DIRS)
+
+  executorch_target_link_options_shared_lib(torchao_ops_executorch)
+  list(APPEND _executorch_kernels torchao_ops_executorch)
+
+  install(
+    TARGETS torchao_ops_executorch torchao_kernels_aarch64
+    EXPORT ExecuTorchTargets
+    DESTINATION lib
+    INCLUDES
+    DESTINATION ${_common_include_directories}
+  )
+  # If using KleidiAI and XNNPACK has not installed it already, install it
+  if(TORCHAO_BUILD_KLEIDIAI AND NOT (EXECUTORCH_BUILD_XNNPACK
+                                     AND EXECUTORCH_XNNPACK_ENABLE_KLEIDI)
+  )
+    install(
+      TARGETS kleidiai
+      EXPORT ExecuTorchTargets
+      DESTINATION lib
+      INCLUDES
+      DESTINATION ${_common_include_directories}
+    )
+  endif()
+
+endif()
+
 if(EXECUTORCH_BUILD_PYBIND)
 
   # Add codegen tools subdirectory for selective_build pybind module
 
@@ -6,6 +6,36 @@
       "hidden": true,
       "binaryDir": "${sourceDir}/cmake-out"
     },
+    {
+      "name": "android-arm64-v8a",
+      "displayName": "Build executorch core and JNI bindings on android arm64-v8a",
+      "inherits": ["common"],
+      "binaryDir": "${sourceDir}/cmake-out-android-arm64-v8a",
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
+        "ANDROID_ABI": "arm64-v8a"
+      },
+      "condition": {
+        "type": "inList",
+        "string": "${hostSystemName}",
+        "list": ["Darwin", "Linux", "Windows"]
+      }
+    },
+    {
+      "name": "android-x86_64",
+      "displayName": "Build executorch core and JNI bindings on android x86_64",
+      "inherits": ["common"],
+      "binaryDir": "${sourceDir}/cmake-out-android-x86_64",
+      "cacheVariables": {
+        "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/android.cmake",
+        "ANDROID_ABI": "x86_64"
+      },
+      "condition": {
+        "type": "inList",
+        "string": "${hostSystemName}",
+        "list": ["Darwin", "Linux", "Windows"]
+      }
+    },
     {
       "name": "macos",
       "displayName": "Build ExecuTorch for macOS",
 
@@ -84,6 +84,11 @@ let products = deliverables([
     ],
   ],
   "kernels_quantized": [:],
+  "kernels_torchao": [
+    "targets": [
+      "threadpool",
+    ],
+  ],
 ])
 
 let targets = deliverables([
 
@@ -120,7 +120,6 @@ runtime.python_test(
         "test/*.py",
     ]),
     deps = [
-        "fbsource//third-party/pypi/coremltools:coremltools",
         "fbsource//third-party/pypi/pytest:pytest",
         ":partitioner",
         ":quantizer",