pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/check_c10_sync.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/lint.yml‎
Lines changed: 6 additions & 1 deletion b/‎.github/workflows/lint.yml‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 5 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎backends/apple/coreml/test/tester.py‎
Lines changed: 61 additions & 0 deletions b/‎backends/apple/coreml/test/tester.py‎
Lines changed: 61 additions & 0 deletions
diff --git a/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py‎
Lines changed: 78 additions & 0 deletions b/‎backends/arm/_passes/decorate_fp32_to_int32_casting_pass.py‎
Lines changed: 78 additions & 0 deletions
@@ -1 +1 @@
-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f
+7cda4017ddda554752e89069ae205be5e8388f59
@@ -12,4 +12,4 @@ pushd pytorch
 git checkout "$pytorch_pin"
 popd
 "$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/c10 pytorch/c10
-"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/standalone pytorch/torch/standalone
+"$(dirname "${BASH_SOURCE[0]}")"/compare_dirs.sh runtime/core/portable_type/c10/torch/headeronly pytorch/torch/headeronly
@@ -83,8 +83,13 @@ jobs:
       script: |
         FILES_NEEDS_FORMAT=$(/opt/google-java-format -n \
           extension/android/executorch_android/src/main/java/org/pytorch/executorch/*.java \
+          extension/android/executorch_android/src/main/java/org/pytorch/executorch/extension/llm/*.java \
+          extension/android/executorch_android/src/main/java/org/pytorch/executorch/annotations/*.java \
+          extension/android/executorch_android/src/androidTest/java/org/pytorch/executorch/*.java \
           examples/demo-apps/android/LlamaDemo/app/src/main/java/com/example/executorchllamademo/*.java \
-          extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java)
+          examples/demo-apps/android/LlamaDemo/app/src/androidTest/java/com/example/executorchllamademo/*.java \
+          extension/benchmark/android/benchmark/app/src/main/java/org/pytorch/minibench/*.java \
+          extension/benchmark/android/benchmark/app/src/androidTest/java/org/pytorch/minibench/*.java)
         if [ -n "$FILES_NEEDS_FORMAT" ]; then
           echo "Warning: The following files need formatting. Please use google-java-format."
           echo "Use a binary from https://github.com/google/google-java-format/releases/"
 
@@ -240,11 +240,11 @@ jobs:
 
         cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
         setup_script_args=""
-        if [[ ${{ matrix.os}} == "bare_metal" ]]; then 
+        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
           toolchain_prefix=arm-none-eabi-
-          threshold="103268" # ~100KiB
+          threshold="104000" # should be ~103.7KB, set threshold to 104KB.
           toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
-        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then 
+        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
           setup_script_args="--target-toolchain zephyr"
           toolchain_prefix=arm-zephyr-eabi-
           threshold="133120" # should be ~125KB, set threshold to 130KB
 
@@ -490,7 +490,7 @@ install(
   INCLUDES
   DESTINATION ${_common_include_directories}
 )
-install(FILES tools/cmake/executorch-config.cmake
+install(FILES tools/cmake/Utils.cmake tools/cmake/executorch-config.cmake
         DESTINATION lib/cmake/ExecuTorch
 )
 
@@ -732,4 +732,8 @@ if(EXECUTORCH_BUILD_VULKAN)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/vulkan)
 endif()
 
+if(EXECUTORCH_BUILD_ANDROID_JNI)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extension/android)
+endif()
+
 include(Test.cmake)
@@ -0,0 +1,61 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Any, List, Optional, Tuple
+
+import executorch
+import executorch.backends.test.harness.stages as BaseStages
+
+import torch
+from executorch.backends.apple.coreml.partition import CoreMLPartitioner
+from executorch.backends.test.harness import Tester as TesterBase
+from executorch.backends.test.harness.stages import StageType
+from executorch.exir import EdgeCompileConfig
+from executorch.exir.backend.partitioner import Partitioner
+
+
+class Partition(BaseStages.Partition):
+    def __init__(self, partitioner: Optional[Partitioner] = None):
+        super().__init__(
+            partitioner=partitioner or CoreMLPartitioner,
+        )
+
+
+class ToEdgeTransformAndLower(BaseStages.ToEdgeTransformAndLower):
+    def __init__(
+        self,
+        partitioners: Optional[List[Partitioner]] = None,
+        edge_compile_config: Optional[EdgeCompileConfig] = None,
+    ):
+        super().__init__(
+            default_partitioner_cls=CoreMLPartitioner,
+            partitioners=partitioners,
+            edge_compile_config=edge_compile_config,
+        )
+
+
+class CoreMLTester(TesterBase):
+    def __init__(
+        self,
+        module: torch.nn.Module,
+        example_inputs: Tuple[torch.Tensor],
+        dynamic_shapes: Optional[Tuple[Any]] = None,
+    ):
+        # Specialize for XNNPACK
+        stage_classes = (
+            executorch.backends.test.harness.Tester.default_stage_classes()
+            | {
+                StageType.PARTITION: Partition,
+                StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
+            }
+        )
+
+        super().__init__(
+            module=module,
+            stage_classes=stage_classes,
+            example_inputs=example_inputs,
+            dynamic_shapes=dynamic_shapes,
+        )
@@ -6,6 +6,7 @@ python_library(
     deps = [
         "//executorch/backends/arm:tosa_quant_utils",
         "//executorch/backends/arm:tosa_utils",
+        "//executorch/backends/arm/tosa/dialect:lib",
         "//executorch/backends/transforms:fuse_view_copy",
         "//executorch/backends/transforms:remove_getitem_op",
         "//executorch/backends/transforms:replace_scalar_with_tensor",
 
@@ -51,6 +51,7 @@
 from .decompose_sqrt_pass import DecomposeSqrtPass  # noqa
 from .decompose_sum_pass import DecomposeSumPass  # noqa
 from .decompose_var_pass import DecomposeVarPass  # noqa
+from .decorate_fp32_to_int32_casting_pass import DecorateFp32toInt32CastingPass  # noqa
 from .fold_qdq_with_annotated_qparams_pass import (  # noqa
     FoldAndAnnotateQParamsPass,
     QuantizeOperatorArguments,
 
@@ -56,6 +56,7 @@
     DecomposeSqrtPass,
     DecomposeSumPass,
     DecomposeVarPass,
+    DecorateFp32toInt32CastingPass,
     FoldAndAnnotateQParamsPass,
     FuseBatchnorm2DPass,
     FuseConstantArgsPass,
@@ -200,6 +201,9 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(MatchArgRanksPass(exported_program))
         self.add_pass(DecomposeAdaptiveAvgPool2dPass())
         self.add_pass(DecomposeAvgPool2d())
+        self.add_pass(
+            DecorateFp32toInt32CastingPass()
+        )  # Require that no new fp32->int32 is introduced after this pass
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
         self.add_pass(DecomposeGroupedConv())
 
@@ -0,0 +1,78 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.backends.arm._passes.arm_pass_utils import get_node_arg
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+def _get_decorated_ops(op):
+    if op in DecorateFp32toInt32CastingPass.targets:
+        return (
+            exir_ops.edge.aten.full.default,
+            exir_ops.edge.aten.ge.Tensor,
+            exir_ops.edge.aten.floor.default,
+            exir_ops.edge.aten.ceil.default,
+            exir_ops.edge.aten.where.self,
+        )
+    else:
+        raise RuntimeError(f"Can't get decorated ops for op {op}")
+
+
+class DecorateFp32toInt32CastingPass(ArmPass):
+    """
+    To lower pytorch fp32 -> int32 casting to TOSA,
+    we need to transform the value with Ceil, Floor, and Where.
+    Before:
+        output = to_copy(x, dtype=torch.int32)
+    After:
+        %zero = full((1,), 0.0, dtype=torch.float32)
+        is_non_negative = x >= %zero
+        floor_x = floor(x)
+        ceil_x = ceil(x)
+        decorated_x = where(is_non_negative, floor_x, ceil_x)
+        output = to_copy(decorated_x, dtype=torch.int32)
+    """
+
+    targets = [
+        exir_ops.edge.aten._to_copy.default,
+        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
+    ]
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in self.targets:
+            return super().call_operator(op, args, kwargs, meta)
+
+        input = get_node_arg(args, 0)
+        input_dtype = input.node.meta["val"].dtype
+        output_dtype = meta["val"].dtype
+
+        if not (input_dtype == torch.float32 and output_dtype == torch.int32):
+            return super().call_operator(op, args, kwargs, meta)
+
+        op_full, op_ge, op_floor, op_ceil, op_where = _get_decorated_ops(op)
+
+        zero = super().call_operator(
+            op_full,
+            args=((1,) * len(meta["val"].size()), 0.0),
+            kwargs={"dtype": torch.float32},
+            meta=meta,
+            updated=True,
+        )
+
+        is_non_negative = super().call_operator(
+            op_ge, (input, zero), {}, meta, updated=True
+        )
+        floor_x = super().call_operator(op_floor, (input,), {}, meta, updated=True)
+        ceil_x = super().call_operator(op_ceil, (input,), {}, meta, updated=True)
+        decorated_x = super().call_operator(
+            op_where, (is_non_negative, floor_x, ceil_x), {}, meta, updated=True
+        )
+
+        return super().call_operator(op, (decorated_x,), kwargs, meta, updated=True)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-9b498d3bb28b8e3411ce464dd2755c5b96d92c8f`
	`1`	`+7cda4017ddda554752e89069ae205be5e8388f59`