pytorch
diff --git a/‎.ci/scripts/setup-arm-baremetal-tools.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/setup-arm-baremetal-tools.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build-presets.yml‎
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/build-presets.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎.github/workflows/trunk.yml‎
Lines changed: 43 additions & 12 deletions b/‎.github/workflows/trunk.yml‎
Lines changed: 43 additions & 12 deletions
diff --git a/‎CMakePresets.json‎
Lines changed: 11 additions & 0 deletions b/‎CMakePresets.json‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py‎
Lines changed: 92 additions & 0 deletions b/‎backends/arm/_passes/decompose_adaptive_avg_pool2d_pass.py‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_sign_pass.py‎
Lines changed: 73 additions & 0 deletions b/‎backends/arm/_passes/decompose_sign_pass.py‎
Lines changed: 73 additions & 0 deletions
@@ -8,4 +8,4 @@
 # Setup arm example environment (including TOSA tools)
 git config --global user.email "[email protected]"
 git config --global user.name "Github Executorch"
-bash examples/arm/setup.sh --i-agree-to-the-contained-eula
+bash examples/arm/setup.sh --i-agree-to-the-contained-eula ${@:-}
@@ -34,6 +34,45 @@ jobs:
         ${CONDA_RUN} cmake --preset ${{ matrix.preset }}
         ${CONDA_RUN} cmake --build cmake-out -j$(( $(sysctl -n hw.ncpu) - 1 ))
 
+  zephyr:
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
+    strategy:
+      fail-fast: false
+      matrix:
+        preset: [zephyr]
+    with:
+      job-name: build
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      runner: linux.2xlarge
+      docker-image: executorch-ubuntu-22.04-arm-sdk
+      submodules: recursive
+      timeout: 90
+      script: |
+        set -eux
+        # The generic Linux job chooses to use base env, not the one setup by the image
+        CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
+        conda activate "${CONDA_ENV}"
+
+        ./install_requirements.sh > /dev/null
+
+        # Download toolchain
+        toolchain_url="https://github.com/zephyrproject-rtos/sdk-ng/releases/download/v0.17.2/toolchain_linux-x86_64_arm-zephyr-eabi.tar.xz"
+        toolchain_dir="arm-zephyr-eabi"
+        curl --output "${toolchain_dir}.tar.xz" -L "${toolchain_url}"
+
+        # Verify download
+        echo "93128be0235cf5cf5f1ee561aa6eac5f  ${toolchain_dir}.tar.xz" > arm-zephyr-eabi.md5
+        md5sum -c --strict arm-zephyr-eabi.md5
+        
+        # Extract and install to PATH
+        tar xf "${toolchain_dir}.tar.xz"
+        rm -f "${toolchain_dir}.tar.xz"
+        toolchain_bin_path="$(cd ${toolchain_dir}/bin && pwd)"
+        export PATH=$PATH:${toolchain_bin_path}
+
+        # Build Arm Zephyr Preset
+        cmake --preset ${{ matrix.preset }}
+        cmake --build cmake-out -j$(( $(nproc) - 1 ))
   linux:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     strategy:
 
@@ -223,6 +223,10 @@ jobs:
     permissions:
       id-token: write
       contents: read
+    strategy:
+      matrix:
+        os: [bare_metal, zephyr-preset]
+      fail-fast: false
     with:
       runner: linux.2xlarge
       docker-image: executorch-ubuntu-22.04-arm-sdk
@@ -234,35 +238,62 @@ jobs:
         CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
         conda activate "${CONDA_ENV}"
 
+        cxx_flags="-fno-exceptions -fno-rtti -Wall -Werror -Wno-int-in-bool-context -DET_HAVE_PREAD=0"
+        setup_script_args=""
+        if [[ ${{ matrix.os}} == "bare_metal" ]]; then 
+          toolchain_prefix=arm-none-eabi-
+          threshold="103268" # ~100KiB
+          toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
+        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then 
+          setup_script_args="--target-toolchain zephyr"
+          toolchain_prefix=arm-zephyr-eabi-
+          threshold="133120" # should be ~125KB, set threshold to 130KB
+          toolchain_cmake=examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake
+        else
+          echo "Fail unsupport OS selection ${{ matrix.os }}"
+          exit 1
+        fi
+
         source .ci/scripts/utils.sh
         install_executorch "--use-pt-pinned-commit"
-        .ci/scripts/setup-arm-baremetal-tools.sh
+        .ci/scripts/setup-arm-baremetal-tools.sh ${setup_script_args}
         source examples/arm/ethos-u-scratch/setup_path.sh
 
-        # User baremetal toolchain
-        arm-none-eabi-c++ --version
-        toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake
+        # User toolchain
+        ${toolchain_prefix}c++ --version
+
+        # Setup cmake target to desired toolchain
         toolchain_cmake=$(realpath ${toolchain_cmake})
 
-        # Build and test size test
-        bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
+        # Build and run size test
+        if [[ ${{ matrix.os}} == "bare_metal" ]]; then
+          bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
+        elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
+          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
+          cmake --build cmake-out -j9 --target install --config Release
+          CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
+          cmake --build cmake-out/test -j9 --config Release
+        else
+          echo "Fail unsupport OS selection ${{ matrix.os }}"
+          exit 1
+        fi
+
         elf="cmake-out/test/size_test"
 
         # Dump basic info
         ls -al ${elf}
-        arm-none-eabi-size ${elf}
+        ${toolchain_prefix}size ${elf}
 
-        # Dump symbols
+        # Dump symbol
         python .github/scripts/run_nm.py -e ${elf}
-        python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "arm-none-eabi-"
-        python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "arm-none-eabi-"
+        python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "${toolchain_prefix}"
+        python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "${toolchain_prefix}"
 
         # Add basic guard - TODO: refine this!
-        arm-none-eabi-strip ${elf}
+        ${toolchain_prefix}strip ${elf}
         output=$(ls -la ${elf})
         arr=($output)
         size=${arr[4]}
-        threshold="103268" # ~100KiB
         echo "size: $size, threshold: $threshold"
         if [[ "$size" -le "$threshold" ]]; then
           echo "Success $size <= $threshold"
 
@@ -104,6 +104,17 @@
                 "Windows"
             ]
         }
+    },
+    {
+        "name": "zephyr",
+        "displayName": "Build everything buildable on Zephyr RTOS",
+        "inherits": [
+            "common"
+        ],
+        "cacheVariables": {
+            "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/zephyr.cmake",
+            "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake"
+        }
     }
   ]
 }
@@ -23,6 +23,7 @@
 from .convert_squeezes_to_view import ConvertSqueezesToViewPass  # noqa
 from .convert_to_clamp import ConvertToClampPass  # noqa
 from .decompose_acosh_pass import DecomposeAcoshPass  # noqa
+from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass  # noqa
 from .decompose_atan_pass import DecomposeAtanPass  # noqa
 from .decompose_avg_pool2d import DecomposeAvgPool2d  # noqa
 from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass  # noqa
@@ -41,6 +42,7 @@
 from .decompose_ne_pass import DecomposeNotEqualPass  # noqa
 from .decompose_round_pass import DecomposeRoundPass  # noqa
 from .decompose_select import DecomposeSelectPass  # noqa
+from .decompose_sign_pass import DecomposeSignPass  # noqa
 from .decompose_silu_pass import DecomposeSiluPass  # noqa
 from .decompose_sinh_pass import DecomposeSinhPass  # noqa
 from .decompose_softmax_pass import DecomposeSoftmaxPass  # noqa
 
@@ -6,6 +6,8 @@
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
+
+import executorch.backends.arm.tosa.dialect  # noqa: unused
 from executorch.backends.arm._passes import (
     AddBiasPass,
     AnnotateChannelsLastDimOrder,
@@ -26,6 +28,7 @@
     ConvertSqueezesToViewPass,
     ConvertToClampPass,
     DecomposeAcoshPass,
+    DecomposeAdaptiveAvgPool2dPass,
     DecomposeAtanPass,
     DecomposeAvgPool2d,
     DecomposeBatchNormNoStatsPass,
@@ -44,6 +47,7 @@
     DecomposeNotEqualPass,
     DecomposeRoundPass,
     DecomposeSelectPass,
+    DecomposeSignPass,
     DecomposeSiluPass,
     DecomposeSinhPass,
     DecomposeSoftmaxPass,
@@ -124,6 +128,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         if self.tosa_spec.is_U55_subset:
             self.add_pass(BroadcastArgsPass())
         self.add_pass(DecomposeLinearPass())
+        self.add_pass(DecomposeAdaptiveAvgPool2dPass())
         self.add_pass(DecomposeAvgPool2d())
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
@@ -158,6 +163,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertIntPowToMuls())
         self.add_pass(CastBoolToInt8Pass())
         self.add_pass(DecomposeSinhPass())
+        self.add_pass(DecomposeSignPass())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSAMI())
         self.add_pass(DecomposeEmbeddingPass())
         self.add_pass(FuseQuantizedActivationPass())
@@ -190,6 +196,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
         self.add_pass(MatchArgRanksPass(exported_program))
+        self.add_pass(DecomposeAdaptiveAvgPool2dPass())
         self.add_pass(DecomposeAvgPool2d())
         self.add_pass(ComputeConstantOpsAOT(exported_program))
 
@@ -242,6 +249,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         self.add_pass(DecomposeScaledDotProductAttention())
         self.add_pass(DecomposeRoundPass())
         self.add_pass(CastBoolToInt8Pass())
+        self.add_pass(DecomposeSignPass())
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(ScalarsToAttributePass())
         self.add_pass(DecomposeGroupNormPass())
 
@@ -0,0 +1,92 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from math import ceil, floor
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_ops = (exir_ops.edge.aten._adaptive_avg_pool2d.default,)
+aten_ops = (torch.ops.aten.adaptive_avg_pool2d.default,)
+
+
+def _get_decomposition(op) -> tuple:
+    if op in edge_ops:
+        return (
+            exir_ops.edge.aten.avg_pool2d.default,
+            exir_ops.edge.aten.slice_copy.Tensor,
+            exir_ops.edge.aten.cat.default,
+        )
+    if op in aten_ops:
+        return (
+            torch.ops.aten.avg_pool2d.default,
+            torch.ops.aten.slice_copy.Tensor,
+            torch.ops.aten.cat.default,
+        )
+    raise RuntimeError(f"Unable to get decomposition for op {op}")
+
+
+class DecomposeAdaptiveAvgPool2dPass(ArmPass):
+    """
+    Decomposes AdaptiveAvgPool2d into AvgPool2d operations.
+
+    An input tensor of shape (N, C, H, W) is transformed into an output tensor
+    of shape (N, C, output_size_h, output_size_w).
+
+    The output is of size output_size_h x output_size_w for any input.
+    """
+
+    def call_operator(self, op, args, kwargs, meta, updated=False):
+        if op not in (edge_ops + aten_ops):
+            return super().call_operator(op, args, kwargs, meta, updated)
+
+        avg_pool2d_op, slice_op, cat_op = _get_decomposition(op)
+
+        x = args[0]
+        _, _, input_size_h, input_size_w = x.data.shape
+
+        (output_size_h, output_size_w) = args[1]
+
+        # Vela currently only allows a stride in the interval of [1,3] for AvgPool2d.
+        # To accommodate this, the AvgPool2d op is applied to pooling regions and the results are concatenated.
+
+        res = []
+        for out_i in range(output_size_h):
+            row = []
+            for out_j in range(output_size_w):
+                # Calculate pooling regions
+                start_h = floor(out_i * input_size_h / output_size_h)
+                end_h = ceil((out_i + 1) * input_size_h / output_size_h)
+                start_w = floor(out_j * input_size_w / output_size_w)
+                end_w = ceil((out_j + 1) * input_size_w / output_size_w)
+
+                # Slice along H
+                x_h = super().call_operator(
+                    slice_op, (x, 2, start_h, end_h), kwargs, meta, True
+                )
+                # Slice along W
+                x_hw = super().call_operator(
+                    slice_op, (x_h, 3, start_w, end_w), kwargs, meta, True
+                )
+
+                # Apply avg pooling with kernel size equal to the pooling region
+                kernel_h = end_h - start_h
+                kernel_w = end_w - start_w
+                pool_args = (x_hw, (kernel_h, kernel_w), (1, 1), (0, 0))
+                pooled = super().call_operator(
+                    avg_pool2d_op, pool_args, kwargs, meta, True
+                )
+                row.append(pooled)
+
+            # Concatenate row results along width (dim=3)
+            row_tensor = super().call_operator(cat_op, (row, 3), kwargs, meta, True)
+            res.append(row_tensor)
+
+        # Concatenate all rows along height (dim=2)
+        out = super().call_operator(cat_op, (res, 2), kwargs, meta, True)
+        return out
@@ -0,0 +1,73 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+
+# For MI case
+edge_sign = exir_ops.edge.aten.sign.default
+# For BI case
+aten_sign = torch.ops.aten.sign.default
+
+
+def get_ops(op):
+    """Returns the appropriate operator functions based on the input operator."""
+    if op == edge_sign:
+        return (
+            exir_ops.edge.aten.gt.Scalar,
+            exir_ops.edge.aten.lt.Scalar,
+            exir_ops.edge.aten.where.self,
+            exir_ops.edge.aten.neg.default,
+            exir_ops.edge.aten.mul.Scalar,
+            exir_ops.edge.aten.add.Scalar,
+        )
+    elif op == aten_sign:
+        return (
+            torch.ops.aten.gt.Scalar,
+            torch.ops.aten.lt.Scalar,
+            torch.ops.aten.where.self,
+            torch.ops.aten.neg.default,
+            torch.ops.aten.mul.Scalar,
+            torch.ops.aten.add.Scalar,
+        )
+    else:
+        raise ValueError(f"Unsupported operator: {op}")
+
+
+class DecomposeSignPass(ArmPass):
+    """Decomposes the sign operator into a sequence of operations that are supported by the Arm backend."""
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (edge_sign, aten_sign):
+            return super().call_operator(op, args, kwargs, meta)
+
+        gt_op, lt_op, where_op, neg_op, mul_op, add_op = get_ops(op)
+
+        x = args[0]
+
+        gt_mask = super().call_operator(gt_op, (x, 0.0), {}, meta, updated=True)
+        lt_mask = super().call_operator(lt_op, (x, 0.0), {}, meta, updated=True)
+
+        zeros = super().call_operator(mul_op, (x, 0.0), {}, meta, updated=True)
+        ones = super().call_operator(add_op, (zeros, 1.0), {}, meta, updated=True)
+        neg_ones = super().call_operator(neg_op, (ones,), {}, meta, updated=True)
+
+        negative_tensor = super().call_operator(
+            where_op, (lt_mask, neg_ones, zeros), {}, meta, updated=True
+        )
+        positive_tensor = super().call_operator(
+            where_op, (gt_mask, ones, zeros), {}, meta, updated=True
+        )
+
+        return super().call_operator(
+            where_op,
+            (lt_mask, negative_tensor, positive_tensor),
+            {},
+            meta,
+            updated=True,
+        )
Original file line number	Diff line number	Diff line change
`@@ -104,6 +104,17 @@`
`104`	`104`	`"Windows"`
`105`	`105`	`]`
`106`	`106`	`}`
	`107`	`+ },`
	`108`	`+ {`
	`109`	`+ "name": "zephyr",`
	`110`	`+ "displayName": "Build everything buildable on Zephyr RTOS",`
	`111`	`+ "inherits": [`
	`112`	`+ "common"`
	`113`	`+ ],`
	`114`	`+ "cacheVariables": {`
	`115`	`+ "EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/zephyr.cmake",`
	`116`	`+ "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/zephyr/x86_64-linux-arm-zephyr-eabi-gcc.cmake"`
	`117`	`+ }`
`107`	`118`	`}`
`108`	`119`	`]`
`109`	`120`	`}`