pytorch
diff --git a/‎.ci/scripts/gather_benchmark_configs.py‎
Lines changed: 1 addition & 0 deletions b/‎.ci/scripts/gather_benchmark_configs.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 64 additions & 0 deletions b/‎.github/workflows/apple-perf-private-device-experiment.yml‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎.github/workflows/doc-build.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/doc-build.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/pull.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/pull.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/mps/mps_preprocess.py‎
Lines changed: 15 additions & 1 deletion b/‎backends/apple/mps/mps_preprocess.py‎
Lines changed: 15 additions & 1 deletion
diff --git a/‎backends/arm/__init__.py‎
Lines changed: 10 additions & 0 deletions b/‎backends/arm/__init__.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 12 additions & 2 deletions b/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎backends/arm/ethosu_backend.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/ethosu_backend.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/ethosu_partitioner.py‎
Lines changed: 1 addition & 2 deletions b/‎backends/arm/ethosu_partitioner.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 2 additions & 0 deletions
@@ -24,6 +24,7 @@
     "samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",
     "google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",
     "google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",
+    "apple_iphone_15_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",
 }
 
 # Predefined benchmark configurations
 
@@ -0,0 +1,64 @@
+name: apple-perf (private devices)
+
+on:
+  # TODO (huydhn): Disable the schedule run until we land the change to add device pool and device name
+  # to separate between public and private iOS devices
+  # schedule:
+  # - cron: 0 0,4,8,12,16,20 * * *
+  pull_request:
+    paths:
+      - .github/workflows/apple-perf-private-device-experiment.yml
+  # push:
+  #   branches:
+  #     - main
+  #   paths:
+  #     - .github/workflows/apple-perf-private-device-experiment.yml
+  # Note: GitHub has an upper limit of 10 inputs
+  workflow_dispatch:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: apple_iphone_15_private
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+  workflow_call:
+    inputs:
+      models:
+        description: Models to be benchmarked
+        required: false
+        type: string
+        default: mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8
+      devices:
+        description: Target devices to run benchmark
+        required: false
+        type: string
+        default: apple_iphone_15_private
+      benchmark_configs:
+        description: The list of configs used the benchmark
+        required: false
+        type: string
+
+concurrency:
+  group: apple-perf-private-devices-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }}
+  cancel-in-progress: true
+
+jobs:
+  apple:
+    uses: ./.github/workflows/apple-perf.yml
+    secrets: inherit
+    permissions:
+      id-token: write
+      contents: read
+    with:
+      models: ${{ inputs.models || 'mv3,meta-llama/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8,meta-llama/Llama-3.2-1B-Instruct-QLORA_INT4_EO8' }}
+      devices: apple_iphone_15_private
+      benchmark_configs: ${{ inputs.benchmark_configs }}
@@ -21,12 +21,12 @@ jobs:
       - name: Check URLs
         run: bash ./scripts/check_urls.sh
 
-  check-links:
+  check-xrefs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
       - name: Check Links
-        run: bash ./scripts/check_links.sh
+        run: bash ./scripts/check_xrefs.sh
 
   build:
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
 
@@ -399,7 +399,7 @@ jobs:
         size=${arr[4]}
         # threshold=48120 on devserver with gcc11.4
         # todo(lfq): update once binary size is below 50kb.
-        threshold="51504"
+        threshold="51408"
         if [[ "$size" -le "$threshold" ]]; then
           echo "Success $size <= $threshold"
         else
@@ -436,7 +436,7 @@ jobs:
         size=${arr[4]}
         # threshold=48120 on devserver with gcc11.4
         # todo(lfq): update once binary size is below 50kb.
-        threshold="51784"
+        threshold="47552"
         if [[ "$size" -le "$threshold" ]]; then
           echo "Success $size <= $threshold"
         else
 
@@ -6,6 +6,7 @@
 from typing import ClassVar, Dict, final, List, Tuple
 
 import torch
+from executorch import exir
 
 from executorch.backends.apple.mps.operators.node_visitor import (
     get_node_visitors,
@@ -35,6 +36,7 @@
 
 from executorch.exir.passes.memory_format_ops_pass import DimOrderOpsRevertPass
 from executorch.exir.program._program import _transform
+from executorch.exir.verification.verifier import EXIREdgeDialectVerifier
 from torch.export.exported_program import ExportedProgram
 
 FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
@@ -87,7 +89,19 @@ def preprocess(
         #    the `output_ids` array in the schema.
 
         # TODO: Remove this once we have a better support for the dim-order ops.
-        edge_program = _transform(edge_program, DimOrderOpsRevertPass())
+        # Need to override the verifier to skip the non dim-order ops from tripping the default verifier.
+        edge_program = _transform(
+            edge_program,
+            DimOrderOpsRevertPass(),
+            override_verifiers=[
+                EXIREdgeDialectVerifier(
+                    edge_compile_config=exir.EdgeCompileConfig(
+                        _check_ir_validity=False,  # Disable the edge dialect verifier, since we are in the mps backend.
+                    ),
+                    class_only=True,
+                )
+            ],
+        )
 
         mps_graph = MPSGraph(
             version="0",
 
@@ -0,0 +1,10 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from .arm_backend import ArmCompileSpecBuilder  # noqa  # usort: skip
+from .tosa_backend import TOSABackend  # noqa  # usort: skip
+from .tosa_partitioner import TOSAPartitioner  # noqa  # usort: skip
+from .ethosu_backend import EthosUBackend  # noqa  # usort: skip
+from .ethosu_partitioner import EthosUPartitioner  # noqa  # usort: skip
@@ -1,16 +1,18 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
-# All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
 
+import logging
 from typing import cast
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
+logger = logging.getLogger(__name__)
+
 
 class ConvertExpandCopyToRepeatPass(ExportPass):
     """
@@ -41,6 +43,14 @@ def call_operator(self, op, args, kwargs, meta):
             multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
             for i in range(expanded_rank)
         ]
+
+        if all((x == 1 for x in multiples)):
+            # All dimensions/repetitions occur only once. Remove node
+            # altogether since it's in practice just a copy.
+            logger.warning("Found redundant expand node (no-op). Removing it.")
+
+            return args[0]
+
         return super().call_operator(
             op=self.repeat, args=(args[0], multiples), kwargs=kwargs, meta=meta
         )
@@ -14,9 +14,9 @@
 import logging
 from typing import final, List
 
-from executorch.backends.arm.arm_vela import vela_compile
+from executorch.backends.arm import TOSABackend
 
-from executorch.backends.arm.tosa_backend import TOSABackend
+from executorch.backends.arm.arm_vela import vela_compile
 from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from torch.export.exported_program import ExportedProgram
 
@@ -10,8 +10,7 @@
 from executorch.backends.arm.arm_backend import (
     is_ethosu,
 )  # usort: skip
-from executorch.backends.arm.ethosu_backend import EthosUBackend
-from executorch.backends.arm.tosa_partitioner import TOSAPartitioner
+from executorch.backends.arm import EthosUBackend, TOSAPartitioner
 from executorch.exir.backend.compile_spec_schema import CompileSpec
 from executorch.exir.backend.partitioner import DelegationSpec
 from torch.fx.passes.operator_support import OperatorSupportBase
 
@@ -207,6 +207,7 @@ def is_node_supported(
             exir_ops.edge.aten._log_softmax.default,
             exir_ops.edge.aten.sub.Tensor,
             exir_ops.edge.aten.tanh.default,
+            exir_ops.edge.aten.upsample_bilinear2d.vec,
             exir_ops.edge.aten.upsample_nearest2d.vec,
             exir_ops.edge.aten.var.correction,
             exir_ops.edge.aten.var.dim,
@@ -365,6 +366,7 @@ def is_node_supported(
             exir_ops.edge.aten.sigmoid.default,
             exir_ops.edge.aten.sub.Tensor,
             exir_ops.edge.aten.tanh.default,
+            exir_ops.edge.aten.upsample_bilinear2d.vec,
             exir_ops.edge.aten.upsample_nearest2d.vec,
             exir_ops.edge.aten.gelu.default,
         ):
Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@`
`24`	`24`	`"samsung_galaxy_s24": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98f8788c-2e25-4a3c-8bb2-0d1e8897c0db",`
`25`	`25`	`"google_pixel_8_pro": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/d65096ab-900b-4521-be8b-a3619b69236a",`
`26`	`26`	`"google_pixel_3_private_rooted": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/98d23ca8-ea9e-4fb7-b725-d402017b198d",`
	`27`	`+ "apple_iphone_15_private": "arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/55929353-2f28-4ee5-bdff-d1a95f58cb28",`
`27`	`28`	`}`
`28`	`29`
`29`	`30`	`# Predefined benchmark configurations`