pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/unittest-macos-cmake.sh‎
Lines changed: 0 additions & 1 deletion b/‎.ci/scripts/unittest-macos-cmake.sh‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 15 additions & 2 deletions b/‎.github/workflows/android-release-artifacts.yml‎
Lines changed: 15 additions & 2 deletions
diff --git a/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/apple.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 37 additions & 0 deletions b/‎.github/workflows/build-presets.yml‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 0 additions & 9 deletions b/‎CMakeLists.txt‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 77 additions & 1 deletion b/‎backends/apple/coreml/compiler/coreml_preprocess.py‎
Lines changed: 77 additions & 1 deletion
@@ -1 +1 @@
-36e3dd54effb3f6d13d792029609292fdd5502bb
+40b02a2dc61bbf901a2df91719f47c98d65368ec
@@ -11,4 +11,3 @@ ${CONDA_RUN} pytest -n auto --cov=./ --cov-report=xml
 # Run gtest
 LLVM_PROFDATA="xcrun llvm-profdata" LLVM_COV="xcrun llvm-cov" \
 ${CONDA_RUN} test/run_oss_cpp_tests.sh
-${CONDA_RUN} test/check_for_installed_private_headers_in_cmake_out.sh
@@ -16,6 +16,7 @@ on:
         options:
           - "xnnpack"
           - "vulkan+xnnpack"
+          - "qnn"
   schedule:
     - cron: 0 10 * * *
 
@@ -83,17 +84,29 @@ jobs:
 
         echo -n "$SECRET_EXECUTORCH_MAVEN_SIGNING_GPG_KEY_CONTENTS" | base64 -d > /tmp/secring.gpg
 
+        GRADLE_ARGS=""
+
         # Update the version name in build.gradle in case of maven publish
         VERSION="${{ inputs.version }}"
         if [ ! -z "$VERSION" ]; then
-          sed -i "s/\(coordinates(\"org.pytorch\", \"executorch-android\", \"\)\([0-9]\+.[0-9]\+.[0-9]\+\)\(\")\)/\1$VERSION\3/" extension/android/executorch_android/build.gradle
+          GRADLE_ARGS+=" -DexecuTorchVersion=${VERSION}"
         fi
 
         FLAVOR="${{ inputs.flavor }}"
         if [[ "$FLAVOR" == "vulkan+xnnpack" || -z "$FLAVOR" ]]; then
           export EXECUTORCH_BUILD_VULKAN=ON
         fi
 
+        if [[ "$FLAVOR" == "qnn" ]]; then
+          PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
+          PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
+          export EXECUTORCH_BUILD_QNN=ON
+          source backends/qualcomm/scripts/qnn_config.sh
+          export QNN_SDK_ROOT="/tmp/qnn/${QNN_VERSION}"
+          export ANDROID_ABIS=arm64-v8a
+          GRADLE_ARGS+=" -DqnnVersion=2.28.0"
+        fi
+
         # Build AAR Package
         mkdir aar-out
         export BUILD_AAR_DIR=aar-out
@@ -106,7 +119,7 @@ jobs:
         # Publish to maven staging
         UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
         if [[ "$UPLOAD_TO_MAVEN" == "true" ]]; then
-          (cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew :executorch_android:publishToMavenCentral)
+          (cd extension/android; ANDROID_HOME="${ANDROID_SDK:-/opt/android/sdk}" ./gradlew  ${GRADLE_ARGS} :executorch_android:publishToMavenCentral)
         fi
 
   upload-release-aar:
 
@@ -156,6 +156,7 @@ jobs:
           "kernels_llm"
           "kernels_optimized"
           "kernels_quantized"
+          "kernels_torchao"
           "threadpool"
         )
 
 
@@ -103,3 +103,40 @@ jobs:
         ./install_requirements.sh > /dev/null
         cmake --preset ${{ matrix.preset }}
         cmake --build cmake-out -j$(( $(nproc) - 1 ))
+
+  windows:
+    uses: pytorch/test-infra/.github/workflows/windows_job.yml@main
+    strategy:
+      fail-fast: false
+      matrix:
+        preset: [pybind, windows]
+    with:
+      job-name: build
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      submodules: recursive
+      timeout: 90
+      script: |
+        set -eux
+        conda init powershell
+        powershell -Command "& {
+          Set-PSDebug -Trace 1
+          \$ErrorActionPreference = 'Stop'
+          \$PSNativeCommandUseErrorActionPreference = \$true
+
+          conda create --yes --quiet -n et python=3.12
+          conda activate et
+          python install_requirements.py
+
+          cmake --preset ${{ matrix.preset }} -T ClangCL
+          if (\$LASTEXITCODE -ne 0) {
+            Write-Host "CMake configuration was unsuccessful. Exit code: \$LASTEXITCODE."
+            exit \$LASTEXITCODE
+          }
+
+          \$numCores = [System.Environment]::GetEnvironmentVariable('NUMBER_OF_PROCESSORS') - 1
+          cmake --build cmake-out -j \$numCores
+          if (\$LASTEXITCODE -ne 0) {
+            Write-Host "CMake build was unsuccessful. Exit code: \$LASTEXITCODE."
+            exit \$LASTEXITCODE
+          }
+        }"
@@ -940,3 +940,42 @@ jobs:
       build-mode: Release
       build-tool: cmake
       docker-image: ci-image:executorch-ubuntu-22.04-clang12
+
+  test-mcu-models:
+    name: test-mcu-models
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    strategy:
+      matrix:
+        include:
+          - build-tool: cmake
+      fail-fast: false
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      runner: linux.2xlarge
+      docker-image: ci-image:executorch-ubuntu-22.04-arm-sdk
+      submodules: 'recursive'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 90
+      script: |
+        BUILD_TOOL=${{ matrix.build-tool }}
+
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        # Try to mirror these as closely as possible
+        source .ci/scripts/utils.sh
+        install_executorch "--use-pt-pinned-commit"
+
+        .ci/scripts/setup-arm-baremetal-tools.sh
+        source examples/arm/ethos-u-scratch/setup_path.sh
+
+        # Run selective Build
+        chmod +x examples/selective_build/test_selective_build.sh
+        examples/selective_build/test_selective_build.sh "${BUILD_TOOL}"
+
+        # Run MCU models
+        chmod +x examples/arm/run_mcu_models_fvp.sh
+        examples/arm/run_mcu_models_fvp.sh --target=cortex-m55
@@ -485,29 +485,24 @@ install(
   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/core
   FILES_MATCHING
   PATTERN "*.h"
-  PATTERN "testing_util" EXCLUDE
 )
 install(
   DIRECTORY runtime/executor/
   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/executor
   FILES_MATCHING
   PATTERN "*.h"
-  PATTERN "test" EXCLUDE
-  PATTERN "platform_memory_allocator.h" EXCLUDE
 )
 install(
   DIRECTORY runtime/kernel/
   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/kernel
   FILES_MATCHING
   PATTERN "*.h"
-  PATTERN "test" EXCLUDE
 )
 install(
   DIRECTORY runtime/platform/
   DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/runtime/platform
   FILES_MATCHING
   PATTERN "*.h"
-  PATTERN "test" EXCLUDE
 )
 install(
   DIRECTORY extension/kernel_util/
@@ -592,15 +587,11 @@ endif()
 
 if(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/data_loader)
-  if(NOT WIN32)
-    set(data_loader_exclude_pattern "*mman_windows.h")
-  endif()
   install(
     DIRECTORY extension/data_loader/
     DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/executorch/extension/data_loader
     FILES_MATCHING
     PATTERN "*.h"
-    PATTERN ${data_loader_exclude_pattern} EXCLUDE
   )
   list(APPEND _executorch_extensions extension_data_loader)
 endif()
 
@@ -17,6 +17,10 @@
 import coremltools as ct
 import coremltools.optimize as cto
 from executorch.backends.apple.coreml import executorchcoreml
+from executorch.backends.apple.coreml.compiler.enumerated_shape_utils import (
+    _get_ct_inputs,
+    _SymbolicShapeToEnumeratedShapeMap,
+)
 from executorch.backends.apple.coreml.logging import get_coreml_log_level
 from executorch.exir.backend.backend_details import (
     BackendDetails,
@@ -37,6 +41,7 @@ class COMPILE_SPEC_KEYS(Enum):
     MIN_DEPLOYMENT_TARGET = "min_deployment_target"
     MODEL_COMPUTE_PRECISION = "model_compute_precision"
     OP_LINEAR_QUANTIZER_CONFIG = "op_linear_quantizer_config"
+    ENUMERATED_SHAPES = "enumerated_shapes"
 
 
 class MODEL_PATHS(Enum):
@@ -143,7 +148,7 @@ def generate_minimum_deployment_target_compile_spec(
     @staticmethod
     def min_deployment_target_from_compile_specs(
         compile_specs: List[CompileSpec],
-    ) -> ct.target:
+    ) -> Optional[ct.target]:
         """
         Returns the minimum deployment target by parsing the list of compile specs.
         """
@@ -214,6 +219,54 @@ def op_linear_quantizer_config_from_compile_specs(
 
         return None
 
+    @staticmethod
+    def generate_enumerated_shapes_compile_spec(
+        ep: ExportedProgram,
+        enumerated_shapes: Dict[str, List[List[int]]],
+    ) -> CompileSpec:
+        """
+        Returns the compile spec representing the model enumerated shapes
+        enumerated_shapes is a dictionary for each input to its enumerated shapes, e.g.,
+
+        enumerated_shapes = {
+         {"x": [[1, 1, 24], [8, 9, 24]]
+         {"y": [[1, 6], [30, 6]],
+        ]
+
+        means the model can handle x can be shape [1, 1, 24] or [8, 9, 24] and y can be shape [1, 6] or [30, 6].
+
+        Only multiple inputs can have enumerated shapes if using iOS18 or later.
+        In this case, each input must have the same number of enumerated shapes, and these shapes are tied together
+        by their order in the list. For example, the model above can handle x with shape [1, 1, 24] and y with shape [1, 6],
+        or x with shape [8, 9, 24] and y with shape [30, 6], but not x with shape [1, 1, 24] and y with shape [30, 6].
+
+        Passing incorrect shapes at runtime will result in an error.
+        """
+        emap = _SymbolicShapeToEnumeratedShapeMap.from_exported_program(
+            ep,
+            enumerated_shapes,
+        )
+        str_representation = emap.to_json()
+        byte_representation = str_representation.encode("utf-8")
+        return CompileSpec(
+            COMPILE_SPEC_KEYS.ENUMERATED_SHAPES.value,
+            byte_representation,
+        )
+
+    @staticmethod
+    def enumerated_shapes_from_compile_specs(
+        compile_specs: List[CompileSpec],
+    ) -> cto.coreml.OpLinearQuantizerConfig:
+        """
+        Returns the model's post conversion quantization by parsing the list of compile specs.
+        """
+        for compile_spec in compile_specs:
+            if compile_spec.key == COMPILE_SPEC_KEYS.ENUMERATED_SHAPES.value:
+                emap_json = compile_spec.value.decode("utf-8")
+                emap = _SymbolicShapeToEnumeratedShapeMap.from_json(emap_json)
+                return emap
+        return None
+
     @staticmethod
     def generate_compile_specs(
         compute_unit: ct.ComputeUnit = ct.ComputeUnit.ALL,
@@ -446,6 +499,28 @@ def preprocess(
         op_linear_quantizer_config = (
             CoreMLBackend.op_linear_quantizer_config_from_compile_specs(compile_specs)
         )
+        enumerated_shapes = CoreMLBackend.enumerated_shapes_from_compile_specs(
+            compile_specs
+        )
+
+        # If using enumerated shapes, we need to pass the inputs to CoreML's convert() function
+        # explicitly
+        ct_inputs = None
+        if enumerated_shapes is not None:
+            ct_inputs = _get_ct_inputs(edge_program, enumerated_shapes)
+
+            # Check there are not multiple enumerated inputs if iOS is below 18
+            if (minimum_deployment_target is None) or (
+                minimum_deployment_target < ct.target.iOS18
+            ):
+                n_enumerated_inputs = 0
+                for ct_in in ct_inputs:
+                    if isinstance(ct_in.shape, ct.EnumeratedShapes):
+                        n_enumerated_inputs += 1
+                if n_enumerated_inputs > 1:
+                    raise ValueError(
+                        f"You're program has {n_enumerated_inputs}, but the minimum_deployment_target is set to {minimum_deployment_target}.  Multiple enumerated inputs requires iOS18 or later."
+                    )
 
         # Load the model if MODEL_TYPE is 'COMPILED_MODEL'. This step is necessary because
         # get_compiled_model_path() requires a loaded model.
@@ -459,6 +534,7 @@ def preprocess(
             compute_precision=model_compute_precision,
             minimum_deployment_target=minimum_deployment_target,
             compute_units=compute_units,
+            inputs=ct_inputs,
         )
 
         if op_linear_quantizer_config is not None:
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-36e3dd54effb3f6d13d792029609292fdd5502bb`
	`1`	`+40b02a2dc61bbf901a2df91719f47c98d65368ec`
Original file line number	Diff line number	Diff line change
`@@ -156,6 +156,7 @@ jobs:`
`156`	`156`	`"kernels_llm"`
`157`	`157`	`"kernels_optimized"`
`158`	`158`	`"kernels_quantized"`
	`159`	`+ "kernels_torchao"`
`159`	`160`	`"threadpool"`
`160`	`161`	`)`
`161`	`162`