pytorch
diff --git a/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 32 additions & 17 deletions b/‎backends/arm/_passes/convert_expand_copy_to_repeat.py‎
Lines changed: 32 additions & 17 deletions
diff --git a/‎backends/arm/_passes/remove_clone_pass.py‎
Lines changed: 10 additions & 0 deletions b/‎backends/arm/_passes/remove_clone_pass.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operator_support/clone_support.py‎
Lines changed: 39 additions & 0 deletions b/‎backends/arm/operator_support/clone_support.py‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/to_copy_support.py‎
Lines changed: 1 addition & 23 deletions b/‎backends/arm/operator_support/to_copy_support.py‎
Lines changed: 1 addition & 23 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 0 additions & 1 deletion b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 33 additions & 7 deletions b/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 33 additions & 7 deletions
diff --git a/‎backends/arm/scripts/parse_test_names.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/scripts/parse_test_names.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/test/misc/test_dim_order_guards.py‎
Lines changed: 84 additions & 39 deletions b/‎backends/arm/test/misc/test_dim_order_guards.py‎
Lines changed: 84 additions & 39 deletions
@@ -8,12 +8,43 @@
 import logging
 from typing import cast
 
+import torch
+
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
 logger = logging.getLogger(__name__)
 
 
+def calculate_multiples(args):
+    input_node_or_tensor = args[0]
+
+    if isinstance(input_node_or_tensor, torch.fx.node.Node):
+        input_data = input_node_or_tensor.meta["val"]
+    else:
+        input_data = input_node_or_tensor.data
+
+    input_shape = input_data.shape
+
+    multiples = cast(list[int], args[1])
+    expanded_rank = len(multiples)
+
+    # Expanded shape is 'input_shape' front-padded with ones.
+    padding = expanded_rank - len(input_shape)
+    extended_shape = [
+        input_shape[i] if i >= 0 else 1 for i in range(-padding, len(input_shape))
+    ]
+
+    # To convert expand arg to repeat arg, non-repeated dims should have
+    # multiples[dim] = 1. Passing -1 to expand arg means
+    # not changing the size of that dimension.
+    multiples = [
+        multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
+        for i in range(expanded_rank)
+    ]
+    return multiples
+
+
 class ConvertExpandCopyToRepeatPass(ExportPass):
     """
     Replace expand copy with repeat since it is a repeat that can only repeat singleton dimensions.
@@ -26,23 +57,7 @@ def call_operator(self, op, args, kwargs, meta):
         if op != self.expand_copy:
             return super().call_operator(op, args, kwargs, meta)
 
-        input_shape = args[0].data.shape
-        multiples = cast(list[int], args[1])
-        expanded_rank = len(multiples)
-
-        # Expanded shape is 'input_shape' front-padded with ones.
-        padding = expanded_rank - len(input_shape)
-        extended_shape = [
-            input_shape[i] if i >= 0 else 1 for i in range(-padding, len(input_shape))
-        ]
-
-        # To convert expand arg to repeat arg, non-repeated dims should have
-        # multiples[dim] = 1. Passing -1 to expand arg means
-        # not changing the size of that dimension.
-        multiples = [
-            multiples[i] if multiples[i] != -1 and extended_shape[i] == 1 else 1
-            for i in range(expanded_rank)
-        ]
+        multiples = calculate_multiples(args)
 
         if all((x == 1 for x in multiples)):
             # All dimensions/repetitions occur only once. Remove node
 
@@ -6,9 +6,13 @@
 
 # pyre-unsafe
 
+import logging
+
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
+logger = logging.getLogger(__name__)
+
 
 class RemoveClonePass(ExportPass):
     """Remove all clones from graph_module"""
@@ -21,4 +25,10 @@ def call_operator(self, op, args, kwargs, meta):
             raise ValueError(
                 f"clone operator expects exactly one argument, got {len(args)}"
             )
+
+        if "memory_format" in kwargs:
+            logger.warning(
+                f"Removing clone with memory_format '{kwargs['memory_format']}'."
+            )
+
         return args[0]
@@ -6,6 +6,7 @@
 # pyre-unsafe
 
 from . import (  # noqa
+    clone_support,
     convolution_support,
     embedding_support,
     ethos_u55_support,
 
@@ -0,0 +1,39 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+import torch.fx as fx
+from executorch.backends.arm.operator_support.tosa_supported_operators import (
+    register_tosa_support_check,
+    SupportedTOSAOperatorCheck,
+)
+from executorch.backends.arm.tosa_specification import TosaSpecification
+from executorch.exir.dialects._ops import ops as exir_ops
+
+logger = logging.getLogger(__name__)
+
+
+@register_tosa_support_check
+class CloneSupported(SupportedTOSAOperatorCheck):
+    targets = [exir_ops.edge.aten.clone.default]
+
+    tosa_specs = [
+        TosaSpecification.create_from_string("TOSA-0.80+BI"),
+        TosaSpecification.create_from_string("TOSA-0.80+MI"),
+        TosaSpecification.create_from_string("TOSA-1.0+INT"),
+        TosaSpecification.create_from_string("TOSA-1.0+FP"),
+    ]
+
+    def is_node_tosa_supported(
+        self, node: fx.Node, tosa_spec: TosaSpecification
+    ) -> bool:
+
+        input_node = node.args[0]
+        if not isinstance(input_node, fx.Node):
+            self.reporter.report_reject(node, "Non tensor clones are not supported")
+            return False
+
+        return True
@@ -113,30 +113,8 @@ def is_node_tosa_supported(
                 f"Output dtype {output_val.dtype} is not supported in "
                 f"{node.target} for input dtype {input_dtype}. "
                 f"Supported output types: "
-                f"{''.join(str(t) for t in supported_dtypes[input_dtype])}",
+                f"{' '.join(str(t) for t in supported_dtypes[input_dtype])}",
             )
             return False
 
-        # Check memory format (to_copy)
-        if "memory_format" in node.kwargs:
-            if node.kwargs["memory_format"] in (torch.preserve_format,):
-                self.reporter.report_reject(
-                    node,
-                    f"Argument 'memory_format' is not supported for "
-                    f"{node.target} right now.",
-                )
-                return False
-
-        # Check dim_order (to_dim_order_copy)
-        if "dim_order" in node.kwargs:
-            dim_order = node.kwargs["dim_order"]
-            # pyre-ignore[6]
-            if dim_order != list(range(len(dim_order))):  # type: ignore[arg-type]
-                self.reporter.report_reject(
-                    node,
-                    f"Argument {dim_order=} is not supported for "
-                    f"{node.target} right now.",
-                )
-                return False
-
         return True
@@ -228,7 +228,6 @@ def is_node_supported(
             exir_ops.edge.aten.var.correction,
             exir_ops.edge.aten.var.dim,
             exir_ops.edge.aten.view_copy.default,
-            exir_ops.edge.aten.clone.default,
             exir_ops.edge.aten.unsqueeze_copy.default,
             exir_ops.edge.aten.squeeze_copy.dims,
             exir_ops.edge.aten.pow.Tensor_Scalar,
 
@@ -415,14 +415,40 @@ def any_or_hardtanh_min_zero(n: Node):
         torch.ops.aten.minimum.default,
         torch.ops.aten.maximum.default,
     ):
-        shared_qspec = SharedQuantizationSpec((node.args[0], node))  # type: ignore[arg-type]
-        quant_properties.quant_inputs = [
-            _QuantProperty(0, input_act_qspec),
-            _QuantProperty(
-                1, input_act_qspec if node.args[0] == node.args[1] else shared_qspec  # type: ignore[arg-type]
-            ),
-        ]
+
+        same_input = node.args[0] == node.args[1]
+
+        # Handle an edge case leading to an infinite recursion of shared qspecs
+        input_0_has_quant_info = (
+            hasattr(node.args[0], "meta")
+            and "quantization_annotation" in node.args[0].meta  # type: ignore[union-attr]
+        )
+        input_0_shared = input_0_has_quant_info and isinstance(  # type: ignore[union-attr]
+            node.args[0].meta["quantization_annotation"].output_qspec,  # type: ignore[union-attr]
+            SharedQuantizationSpec,  # type: ignore[union-attr]
+        )
+
+        if same_input:
+            shared_qspec = SharedQuantizationSpec((node.args[0], node))  # type: ignore[arg-type]
+            quant_properties.quant_inputs = [
+                _QuantProperty(0, input_act_qspec),  # type: ignore[arg-type]
+                _QuantProperty(1, input_act_qspec),  # type: ignore[arg-type]
+            ]
+        elif input_0_shared:
+            shared_qspec = SharedQuantizationSpec((node.args[1], node))  # type: ignore[arg-type]
+            quant_properties.quant_inputs = [
+                _QuantProperty(0, shared_qspec),  # type: ignore[arg-type]
+                _QuantProperty(1, input_act_qspec),  # type: ignore[arg-type]
+            ]
+        else:
+            shared_qspec = SharedQuantizationSpec((node.args[0], node))  # type: ignore[arg-type]
+            quant_properties.quant_inputs = [
+                _QuantProperty(0, input_act_qspec),  # type: ignore[arg-type]
+                _QuantProperty(1, shared_qspec),  # type: ignore[arg-type]
+            ]
+
         quant_properties.quant_output = _QuantProperty(0, shared_qspec)  # type: ignore[arg-type]
+
     elif node.target in (torch.ops.aten.where.self,):
         shared_qspec = SharedQuantizationSpec(node.args[1])  # type: ignore[arg-type]
         quant_properties.quant_inputs = [
 
@@ -22,6 +22,7 @@
     "unflatten.int",
     "_native_batch_norm_legit_no_training.default",
     "_native_batch_norm_legit.no_stats",
+    "alias_copy.default",
 ]
 ALL_EDGE_OPS = SAMPLE_INPUT.keys() | CUSTOM_EDGE_OPS
 
 
@@ -6,62 +6,107 @@
 
 from typing import Tuple
 
-import pytest
-
 import torch
-from executorch.backends.arm.test import common
 
 from executorch.backends.arm.test.tester.test_pipeline import (
-    TosaPipelineBI,
+    OpNotSupportedPipeline,
     TosaPipelineMI,
 )
 
 
-input_t1 = Tuple[torch.Tensor]  # Input x
+input_t1 = Tuple[torch.Tensor, torch.Tensor]  # Input y
+
+
+class ChannelsLastInput(torch.nn.Module):
+    """
+    Test rejection of a partition which has a channels last input.
+    """
+
+    inputs: input_t1 = (
+        torch.randn(1, 2, 2, 2).to(memory_format=torch.channels_last),
+        torch.randn(1, 2, 2, 2),
+    )
+
+    def forward(self, x, y):
+        x = x * y
+        x = x.to(dtype=torch.int32, memory_format=torch.channels_last)
+        x = x / 2
+        return x, y
+
+
+class ChannelsLastOutput(torch.nn.Module):
+    """
+    Test rejection of a partition which has a channels last output
+    """
+
+    inputs: input_t1 = (
+        torch.randn(
+            1,
+            2,
+            2,
+            2,
+        ),
+        torch.randn(1, 2, 2, 2),
+    )
 
+    def forward(self, x, y):
+        x = x * y
+        x = x.clone(memory_format=torch.channels_last)
+        x = x / 2
+        return x, y
 
-class Conv2D(torch.nn.Module):
-    inputs: dict[str, input_t1] = {
-        "randn": (torch.randn(1, 2, 20, 20),),
-    }
+
+class ChannelsLastInsidePartition(torch.nn.Module):
+    """
+    Test a non rejection of a fully partitioned module which changes memory inside the partition.
+    The TOSA backend ignores this memory format change, and since the input and output
+    has the expected channels_last memory format, the partition should be accepted.
+    """
+
+    inputs: input_t1 = (
+        torch.randn(
+            1,
+            2,
+            2,
+            2,
+        ),
+        torch.randn(1, 2, 2, 2),
+    )
 
     def __init__(self):
         super().__init__()
-        self.conv2d = torch.nn.Conv2d(in_channels=2, out_channels=3, kernel_size=(3, 3))
+        self.conv = torch.nn.Conv2d(2, 2, kernel_size=1, bias=False)
 
-    def forward(self, x):
-        return self.conv2d(x.to(memory_format=torch.channels_last))
+    def forward(self, x, y):
+        x = x * y
+        x = x.to(memory_format=torch.channels_last)
+        x = self.conv(x)
+        x = x.clone(memory_format=torch.contiguous_format)
+        return x, y
 
 
-@common.parametrize("test_data", Conv2D.inputs)
-def test_tosa_MI_pipeline(test_data: input_t1):
-    module = Conv2D()
+def test_dim_order_ok():
     pipeline = TosaPipelineMI[input_t1](
-        module,
-        test_data,
-        [],
-        [],
-        use_to_edge_transform_and_lower=False,
+        ChannelsLastInsidePartition(), ChannelsLastInsidePartition.inputs, []
     )
-    pos = pipeline.find_pos("partition")
-    pipeline._stages = pipeline._stages[:pos]
     pipeline.run()
-    with pytest.raises(RuntimeError):
-        pipeline.tester.partition()
-
-
-@common.parametrize("test_data", Conv2D.inputs)
-def test_tosa_BI_pipeline(test_data: input_t1):
-    module = Conv2D()
-    pipeline = TosaPipelineBI[input_t1](
-        module,
-        test_data,
-        [],
-        [],
-        use_to_edge_transform_and_lower=False,
+
+
+def test_channels_last_input():
+    pipeline = OpNotSupportedPipeline[input_t1](
+        ChannelsLastInput(),
+        ChannelsLastInput.inputs,
+        non_delegated_ops={},
+        n_expected_delegates=0,
+    )
+    pipeline.run()
+
+
+def test_channels_last_output():
+    pipeline = OpNotSupportedPipeline[input_t1](
+        ChannelsLastOutput(),
+        ChannelsLastOutput.inputs,
+        non_delegated_ops={},
+        n_expected_delegates=0,
     )
-    pos = pipeline.find_pos("partition")
-    pipeline._stages = pipeline._stages[:pos]
     pipeline.run()
-    with pytest.raises(RuntimeError):
-        pipeline.tester.partition()
Original file line number	Diff line number	Diff line change
`@@ -22,6 +22,7 @@`
`22`	`22`	`"unflatten.int",`
`23`	`23`	`"_native_batch_norm_legit_no_training.default",`
`24`	`24`	`"_native_batch_norm_legit.no_stats",`
	`25`	`+ "alias_copy.default",`
`25`	`26`	`]`
`26`	`27`	`ALL_EDGE_OPS = SAMPLE_INPUT.keys() \| CUSTOM_EDGE_OPS`
`27`	`28`