pytorch
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 10 additions & 3 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎backends/arm/_passes/convert_full_like_to_full_pass.py‎
Lines changed: 33 additions & 0 deletions b/‎backends/arm/_passes/convert_full_like_to_full_pass.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 109 additions & 0 deletions b/‎backends/arm/_passes/insert_rescales_pass.py‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operators/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_rescale.py‎
Lines changed: 70 additions & 0 deletions b/‎backends/arm/operators/op_rescale.py‎
Lines changed: 70 additions & 0 deletions
diff --git a/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 9 additions & 0 deletions b/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 9 additions & 0 deletions
@@ -1,6 +1,6 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -18,6 +18,9 @@
 from executorch.backends.arm._passes.convert_expand_copy_to_repeat import (
     ConvertExpandCopyToRepeatPass,
 )
+from executorch.backends.arm._passes.convert_full_like_to_full_pass import (
+    ConvertFullLikeToFullPass,
+)
 from executorch.backends.arm._passes.convert_split_to_slice import (
     ConvertSplitToSlicePass,
 )
@@ -49,6 +52,7 @@
 from executorch.backends.arm._passes.fuse_quantized_activation_pass import (  # type: ignore[import-not-found]
     FuseQuantizedActivationPass,
 )
+from executorch.backends.arm._passes.insert_rescales_pass import InsertRescalePass
 from executorch.backends.arm._passes.insert_table_ops import InsertTableOpsPass
 from executorch.backends.arm._passes.keep_dims_false_to_squeeze_pass import (
     KeepDimsFalseToSqueezePass,
@@ -72,6 +76,7 @@
     UnsqueezeScalarPlaceholdersPass,
 )
 from executorch.backends.arm.tosa_specification import TosaSpecification
+
 from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass
 from executorch.exir import ExportedProgram
 from executorch.exir.pass_manager import PassManager
@@ -95,6 +100,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertMmToBmmPass())
         self.add_pass(DecomposeLinearPass())
         self.add_pass(ConvertMeanDimToAveragePoolPass())
+        self.add_pass(ConvertFullLikeToFullPass())
 
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
@@ -115,7 +121,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertSqueezesToViewPass())
 
         self.add_pass(AnnotateChannelsLastDimOrder())
-
+        self.add_pass(InsertRescalePass())
         return self._transform(exported_program.graph_module)
 
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
@@ -133,7 +139,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertMeanDimToAveragePoolPass())
         self.add_pass(DecomposeDivPass())
         self.add_pass(DecomposeSoftmaxesPass())
-
+        self.add_pass(ConvertFullLikeToFullPass())
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
         self.add_pass(FoldAndAnnotateQParamsPass())  # type: ignore[call-arg]
@@ -153,6 +159,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ConvertSqueezesToViewPass())
 
         self.add_pass(AnnotateChannelsLastDimOrder())
+        self.add_pass(InsertRescalePass())
 
         return self._transform(exported_program.graph_module)
 
 
@@ -0,0 +1,33 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+
+
+class ConvertFullLikeToFullPass(ExportPass):
+    """As per the full_like pytorch documentation,
+    `torch.full_like(input, fill_value)` is equivalent to
+    `torch.full(input.size(),
+                fill_value,
+                dtype=input.dtype,
+                layout=input.layout,
+                device=input.device
+                )`
+    Skip layout and device since it's not relevant for our backend.
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in [
+            exir_ops.edge.aten.full_like.default,
+        ]:
+            return super().call_operator(op, args, kwargs, meta)
+
+        tensor = args[0].data
+        full_args = (list(tensor.shape), args[1])
+        full_kwargs = {"dtype": tensor.dtype}
+        return super().call_operator(
+            exir_ops.edge.aten.full.default, full_args, full_kwargs, meta
+        )
@@ -131,6 +131,9 @@ def call(self, graph_module: GraphModule) -> PassResult:
             n = cast(Node, n)
             if n.op != "call_function":
                 continue
+            # Don't fold chains of quant-ops into each other.
+            if n.target in (q_op, dq_op):
+                continue
 
             # Make sure we haven't already set qparams meta information on the node
             assert "input_qparams" not in n.meta.keys()
 
@@ -0,0 +1,109 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+from copy import copy
+from typing import cast
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.backends.arm.tosa_quant_utils import dq_op, q_op, QuantArgs
+from executorch.exir.pass_base import ExportPass, PassResult
+from torch import Tensor
+from torch.fx import GraphModule, Node
+from torch.library import custom_op, register_fake
+
+logger = logging.getLogger(__name__)
+
+
+@custom_op("tosa::_rescale", mutates_args=())  # type: ignore[misc]
+def rescale(
+    x: Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int
+) -> Tensor:
+    logger.warning(
+        "Ran default implementation of tosa::_rescale."
+        "This op is meant to always be inserted inside a partition and a correct default implementation is not implemented."
+    )
+    # Clone is needed to not return reference when rescaling to same dtype.
+    # This is a neccessary requirement for non-mutating custom ops.
+    return x.to(dtype=dtype).clone()
+
+
+@register_fake("tosa::_rescale")  # type: ignore[misc]
+def rescale_fake(
+    x: Tensor, dtype: torch.dtype, scale: float, in_zp: int, out_zp: int
+) -> Tensor:
+    """Casts the input tensor to dtype `dtype` to produce the correct tensor meta for a _rescale op.
+    Additionally validates TOSA constraints of a RESCALE op.
+    """
+    if not (dtype == torch.int32 or dtype == torch.int8):
+        raise NotImplementedError(
+            "tosa::rescale currently only supports int32 and int8."
+        )
+    if dtype == torch.int32 and out_zp != 0:
+        raise ValueError(
+            "TOSA requires output_zp to be zero when the output dtype is int32."
+        )
+    if x.dtype == torch.int32 and in_zp != 0:
+        raise ValueError(
+            "TOSA requires input_zp to be zero when the input dtype is int32."
+        )
+    if x.dtype == torch.int8 and not -128 <= in_zp <= 127:
+        raise ValueError(f"{in_zp=} outside valid range (-128,127) for int8.")
+    if dtype == torch.int8 and not -128 <= out_zp <= 127:
+        raise ValueError(f"{out_zp=} outside valid range (-128,127) for int8.")
+
+    return x.to(dtype=dtype).clone()
+
+
+class InsertRescalePass(ExportPass):
+    """Finds patterns of dq -> q, and replaces them
+    with passthrough_to_tosa::rescales.
+
+    Does not garantuee that the dtypes and zero points are valid
+    in TOSA, that is the job of the quantization annotator that
+    produced the dq and q nodes. The TOSA constraints are validated
+    in the fake implementation of passthrough_to_tosa:rescale.
+    """
+
+    def fold_dq_q_to_rescale(self, node: Node, user: Node, graph_module: GraphModule):
+        dq_args = QuantArgs.from_operator(node.target, node.args)
+        q_args = QuantArgs.from_operator(user.target, user.args)
+        new_scale = dq_args.scale / q_args.scale
+
+        with graph_module.graph.inserting_before(node):
+            rescale_node = create_node(
+                graph_module.graph,
+                torch.ops.tosa._rescale.default,
+                (
+                    node.all_input_nodes[0],
+                    q_args.dtype,
+                    new_scale,
+                    dq_args.zp,
+                    q_args.zp,
+                ),
+            )
+            rescale_node.meta = copy(user.meta)
+            user.replace_all_uses_with(rescale_node)
+            graph_module.graph.erase_node(user)
+
+    def call(self, graph_module: GraphModule) -> PassResult:
+        modified = False
+        for node in graph_module.graph.nodes:
+            node = cast(Node, node)
+
+            if node.target is not dq_op:
+                continue
+            # Copy users since we remove them while iterating, modyfing the node.users list.
+            for user in copy(node.users):
+                if user.target is q_op:
+                    self.fold_dq_q_to_rescale(node, user, graph_module)
+                    modified = True
+            if len(node.users) == 0:
+                graph_module.graph.erase_node(node)
+
+        graph_module = super().call(graph_module).graph_module
+        graph_module.recompile()
+        return PassResult(graph_module, modified)
@@ -105,6 +105,7 @@ def is_node_supported(self, submodules, node: fx.Node) -> bool:
             exir_ops.edge.aten.linear.default,
             exir_ops.edge.aten.split_with_sizes_copy.default,
             exir_ops.edge.aten.full.default,
+            exir_ops.edge.aten.full_like.default,
             exir_ops.edge.aten.ge.Tensor,
             exir_ops.edge.aten.gt.Tensor,
             exir_ops.edge.aten.le.Tensor,
 
@@ -32,6 +32,7 @@
     op_reciprocal,
     op_relu,
     op_repeat,
+    op_rescale,
     op_rshift,
     op_rsqrt,
     op_sigmoid,
 
@@ -0,0 +1,70 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+from typing import cast, List
+
+import executorch.backends.arm.tosa_quant_utils as tosa_quant_utils
+import serializer.tosa_serializer as ts  # type: ignore
+import torch
+
+import tosa.Op as TosaOp  # type: ignore
+from executorch.backends.arm.operators.node_visitor import (
+    NodeVisitor,
+    register_node_visitor,
+)
+from executorch.backends.arm.tosa_mapping import map_dtype, TosaArg
+from torch.fx import Node
+
+
+@register_node_visitor
+class RescaleVisitor(NodeVisitor):
+    target = "_rescale.default"
+
+    def define_node(
+        self,
+        node: Node,
+        tosa_graph: ts.TosaSerializer,
+        inputs: List[TosaArg],
+        output: TosaArg,
+    ) -> None:
+
+        input_dtype = inputs[0].dtype
+        output_dtype = cast(torch.dtype, node.args[1])
+        scale = cast(float, node.args[2])
+        input_zp = cast(int, node.args[3])
+        output_zp = cast(int, node.args[4])
+
+        # Skip int16 cases for now.
+        if input_dtype != map_dtype(torch.int8) and input_zp != 0:
+            raise ValueError(
+                f"If input dtype is not int8, input_zp must be 0. Got input_dtype{ts.DTypeNames[input_dtype]}, {input_zp=}"
+            )
+        if output_dtype != torch.int8 and output_zp != 0:
+            raise ValueError(
+                f"If output dtype is not int8, output_zp must be 0. Got {output_dtype=}, {output_zp=}"
+            )
+
+        scale_width = 32 if output_dtype == torch.int32 else 16
+        multiplier, shift = tosa_quant_utils.compute_multiplier_and_shift(
+            scale, scale_width
+        )
+        attr_rescale = ts.TosaSerializerAttribute()
+        attr_rescale.RescaleAttribute(
+            input_zp=input_zp,
+            output_zp=output_zp,
+            multiplier=[multiplier],
+            shift=[shift],
+            scale32=output_dtype == torch.int32,
+            double_round=False,
+            per_channel=False,
+            input_unsigned=False,
+            output_unsigned=False,
+        )
+
+        tosa_graph.addOperator(
+            TosaOp.Op().RESCALE, [inputs[0].name], [output.name], attr_rescale
+        )
@@ -134,6 +134,7 @@ def _match_pattern(
     torch.ops.aten.sum.dim_IntList,
     torch.ops.aten.hardsigmoid.default,
     torch.ops.aten.hardswish.default,
+    torch.ops.aten.full_like.default,
 ]
 
 _one_to_one_shared_input_qspec = [
@@ -383,3 +384,11 @@ def annotate_graph(  # type: ignore[return]
             _annotate_output(node, quant_properties.quant_output)
 
         arm_quantizer_utils.mark_node_as_annotated(node)  # type: ignore[attr-defined]
+
+        # Quantization does not allow kwargs for some reason.
+        # Remove from ops we know have and where we know it does not break anything.
+        if node.target in [
+            torch.ops.aten.full_like.default,
+            torch.ops.aten.full.default,
+        ]:
+            node.kwargs = {}