pytorch
diff --git a/‎.ci/scripts/gather_benchmark_configs.py‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/gather_benchmark_configs.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 64 additions & 0 deletions b/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/__init__.py‎
Lines changed: 10 additions & 0 deletions b/‎backends/arm/__init__.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 12 additions & 2 deletions b/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎backends/arm/ethosu_backend.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/ethosu_backend.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/ethosu_partitioner.py‎
Lines changed: 1 addition & 2 deletions b/‎backends/arm/ethosu_partitioner.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_upsample_bilinear2d.py‎
Lines changed: 100 additions & 0 deletions b/‎backends/arm/operators/op_upsample_bilinear2d.py‎
Lines changed: 100 additions & 0 deletions
@@ -24,6 +24,7 @@
     "samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
     "google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
     "google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
+    "apple_iphone_15_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",
 }
 
 # Predefined benchmark configurations
 
@@ -0,0 +1,64 @@
+name: apple-perf (private devices)
+
+on:
+  # TODO (huydhn): Disable the schedule run until we land the change to add device pool and device name
+  # to separate between public and private iOS devices
+  # schedule:
+  # - cron: 0 0,4,8,12,16,20 * * *
+  pull_request:
+    paths:
+      - .github/workflows/apple-perf-private-device-experiment.yml
+  # push:
+  #   branches:
+  #     - main
+  #   paths:
+  #     - .github/workflows/apple-perf-private-device-experiment.yml
+  # Note: GitHub has an upper limit of 10 inputs
+  workflow_dispatch:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: apple_iphone_15_private
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+  workflow_call:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: apple_iphone_15_private
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+
+concurrency:
+  group: apple-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  apple:
+    uses: ./.github/workflows/apple-perf.yml
+    secrets: inherit
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
+      devices: apple_iphone_15_private
+      benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -399,7 +399,7 @@ jobs:
         size=${arr[4]}
         # threshold=48120 on devserver with gcc11.4
         # todo(lfq): update once binary size is below 50kb.
-        threshold="51504"
+        threshold="51408"
         if [[ "$size" -le "$threshold" ]]; then
           echo "Success $size <= $threshold"
         else
@@ -436,7 +436,7 @@ jobs:
         size=${arr[4]}
         # threshold=48120 on devserver with gcc11.4
         # todo(lfq): update once binary size is below 50kb.
-        threshold="51784"
+        threshold="47552"
         if [[ "$size" -le "$threshold" ]]; then
           echo "Success $size <= $threshold"
         else
 
@@ -0,0 +1,10 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .arm_backend import ArmCompileSpecBuilder  # noqa  # usort: skip
+from .tosa_backend import TOSABackend  # noqa  # usort: skip
+from .tosa_partitioner import TOSAPartitioner  # noqa  # usort: skip
+from .ethosu_backend import EthosUBackend  # noqa  # usort: skip
+from .ethosu_partitioner import EthosUPartitioner  # noqa  # usort: skip
@@ -1,16 +1,18 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-# All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
 
+import logging
 from typing import cast
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
+logger = logging.getLogger(__name__)
+
 
 class ConvertExpandCopyToRepeatPass(ExportPass):
     """
@@ -41,6 +43,14 @@ def call_operator(self, op, args, kwargs, meta):
             multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
             for i in range(expanded_rank)
         ]
+
+        if all((x == 1 for x in multiples)):
+            # All dimensions/repetitions occur only once. Remove node
+            # altogether since it's in practice just a copy.
+            logger.warning("Found redundant expand node (no-op). Removing it.")
+
+            return args[0]
+
         return super().call_operator(
             op=self.repeat, args=(args[0], multiples), kwargs=kwargs, meta=meta
         )
@@ -14,9 +14,9 @@
 import logging
 from typing import final, List
 
-from executorch.backends.arm.arm_vela import vela_compile
+from executorch.backends.arm import TOSABackend
 
-from executorch.backends.arm.tosa_backend import TOSABackend
+from executorch.backends.arm.arm_vela import vela_compile
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.export.exported_program import ExportedProgram
 
@@ -10,8 +10,7 @@
 from executorch.backends.arm.arm_backend import (
     is_ethosu,
 )  # usort: skip
-from executorch.backends.arm.ethosu_backend import EthosUBackend
-from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
+from executorch.backends.arm import EthosUBackend, TOSAPartitioner
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.backend.partitioner import DelegationSpec
 from torch.fx.passes.operator_support import OperatorSupportBase
 
@@ -207,6 +207,7 @@ def is_node_supported(
             exir_ops.edge.aten._log_softmax.default,
             exir_ops.edge.aten.sub.Tensor,
             exir_ops.edge.aten.tanh.default,
+            exir_ops.edge.aten.upsample_bilinear2d.vec,
             exir_ops.edge.aten.upsample_nearest2d.vec,
             exir_ops.edge.aten.var.correction,
             exir_ops.edge.aten.var.dim,
@@ -365,6 +366,7 @@ def is_node_supported(
             exir_ops.edge.aten.sigmoid.default,
             exir_ops.edge.aten.sub.Tensor,
             exir_ops.edge.aten.tanh.default,
+            exir_ops.edge.aten.upsample_bilinear2d.vec,
             exir_ops.edge.aten.upsample_nearest2d.vec,
             exir_ops.edge.aten.gelu.default,
         ):
 
@@ -46,6 +46,7 @@
     op_to_copy,
     op_to_dim_order_copy,
     op_transpose,
+    op_upsample_bilinear2d,
     op_upsample_nearest2d,
     op_view,
     op_where,
 
@@ -0,0 +1,100 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+from typing import List
+
+import torch
+
+import tosa_tools.v0_80.serializer.tosa_serializer as ts  # type: ignore
+
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import TosaArg
+from executorch.backends.arm.tosa_quant_utils import build_rescale
+from executorch.backends.arm.tosa_utils import get_resize_parameters, tosa_shape
+from tosa_tools.v0_80.tosa.ResizeMode import ResizeMode  # type: ignore
+
+
+@register_node_visitor
+class UpsampleBilinear2dVisitor_0_80(NodeVisitor):
+    target = "aten.upsample_bilinear2d.vec"
+
+    def __init__(self, *args):
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        tosa_graph: ts.TosaSerializer,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+        assert (
+            inputs[0].shape is not None and output.shape is not None
+        ), "Only static shapes are supported"
+
+        input_dtype = inputs[0].dtype
+
+        # tosa_shape output is NHWC, take HW
+        input_size_yx = torch.tensor(
+            tosa_shape(inputs[0].shape, inputs[0].dim_order)[1:3]
+        )
+        # Ignore scale and size parameters, directly use the output size as
+        # we only support static shapes currently
+        output_size_yx = torch.tensor(tosa_shape(output.shape, output.dim_order)[1:3])
+
+        scale_n_yx, scale_d_yx, offset_yx, border_yx = get_resize_parameters(
+            input_size_yx, output_size_yx, ResizeMode.NEAREST, align_corners=True
+        )
+
+        def in_int16_range(x):
+            return torch.all(x >= -(2**15)) and torch.all(x <= 2**15 - 1)
+
+        assert in_int16_range(scale_n_yx)
+        assert in_int16_range(scale_d_yx)
+        assert in_int16_range(border_yx)
+
+        attr = ts.TosaSerializerAttribute()
+        attr.ResizeAttribute(
+            scale=[scale_n_yx[0], scale_d_yx[0], scale_n_yx[1], scale_d_yx[1]],
+            offset=offset_yx.tolist(),
+            border=border_yx.tolist(),
+            mode=ResizeMode.BILINEAR,
+        )
+
+        if input_dtype == output.dtype == ts.DType.FP32:
+            tosa_graph.addOperator(
+                ts.TosaOp.Op().RESIZE, [inputs[0].name], [output.name], attr
+            )
+            return
+        elif input_dtype == output.dtype == ts.DType.INT8:
+            intermediate = tosa_graph.addIntermediate(
+                tosa_shape(output.shape, output.dim_order), ts.DType.INT32
+            )
+
+            tosa_graph.addOperator(
+                ts.TosaOp.Op().RESIZE, [inputs[0].name], [intermediate.name], attr
+            )
+
+            final_output_scale = float(1 / (scale_n_yx[0] * scale_n_yx[1]))
+
+            build_rescale(
+                tosa_fb=tosa_graph,
+                scale=[final_output_scale],
+                input_node=intermediate,
+                output_name=output.name,
+                output_type=ts.DType.INT8,
+                output_shape=output.shape,
+                input_zp=0,
+                output_zp=0,
+                is_double_round=False,
+            )
+        else:
+            raise ValueError(
+                "Input/output dtype not in {float32, int8}: {input_dtype=} {output.dtype=}"
+            )
Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@`
`24`	`24`	`"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",`
`25`	`25`	`"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",`
`26`	`26`	`"google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",`
	`27`	`+ "apple_iphone_15_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",`
`27`	`28`	`}`
`28`	`29`
`29`	`30`	`# Predefined benchmark configurations`