Arm backend: Move rescales from MAXIMUM/MINIMUM visitor to pass

Martin Lindström · Martin Lindström · commit 463ed4a2700c · 2025-10-01T10:39:19.000+02:00
Signed-off-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
Change-Id: I5ee8f97590ce599a9dfc60ced0775654fa565c4e
diff --git a/backends/arm/_passes/insert_rescales_pass.py b/backends/arm/_passes/insert_rescales_pass.py
@@ -91,6 +91,8 @@ class InsertRescaleInt32Pass(ArmPass):
         exir_ops.edge.aten.gt.Tensor,
         exir_ops.edge.aten.le.Tensor,
         exir_ops.edge.aten.lt.Tensor,
+        exir_ops.edge.aten.maximum.default,
+        exir_ops.edge.aten.minimum.default,
     ]
 
     def _int32_qargs(self, s):
@@ -121,6 +123,8 @@ def _get_inputs_rescaled_qparams(
             exir_ops.edge.aten.gt.Tensor,
             exir_ops.edge.aten.le.Tensor,
             exir_ops.edge.aten.lt.Tensor,
+            exir_ops.edge.aten.minimum.default,
+            exir_ops.edge.aten.maximum.default,
         ]:
             # For these ops, use the smallest scale among the INT8 operands.
             min_scale = min(
@@ -142,6 +146,8 @@ def _get_output_qparams(
 
         if target in [
             exir_ops.edge.aten.abs.default,
+            exir_ops.edge.aten.maximum.default,
+            exir_ops.edge.aten.minimum.default,
         ]:
             # The op has not altered the scale; the output scale is equal to
             # the operands' scales.
diff --git a/backends/arm/operators/op_maximum.py b/backends/arm/operators/op_maximum.py
@@ -7,12 +7,6 @@
 
 from typing import Any, List
 
-import executorch.backends.arm.tosa.quant_utils as tqutils
-
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-    get_input_qparams,
-)
-
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -22,9 +16,8 @@
     validate_same_dtype,
     validate_valid_dtype,
 )
-from executorch.backends.arm.tosa import TosaSpecification
 from executorch.backends.arm.tosa.mapping import TosaArg
-from executorch.backends.arm.tosa.utils import tosa_shape
+from executorch.backends.arm.tosa.specification import TosaSpecification
 from torch.fx import Node
 
 
@@ -56,51 +49,22 @@ def define_node(
         validate_valid_dtype(
             self.target,
             [*inputs, output],
-            [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
+            [ts.DType.INT32, ts.DType.FP32],
             output.tosa_spec,
         )
 
-        scale_back = 1.0
-        max_output = output
-        if inputs[0].dtype == ts.DType.INT8:
-            input_qparams = get_input_qparams(node)
-            if len(input_qparams) != 2:
-                raise ValueError(
-                    f"Both inputs need to have quantization information for {node}"
-                )
-            if input_qparams[0] != input_qparams[1]:
-                raise ValueError(
-                    "Both inputs must have the same quantization parameters for MAX"
-                )
-
-            operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
-                tosa_graph, inputs, node, self.tosa_spec
-            )
-
-            output.shape = tosa_shape(output.shape, output.dim_order)
-            max_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
-        else:
-            operand_inputs = inputs
-
         attr_maximum = ts.TosaSerializerAttribute()
-
-        # Set to PROPOGATE as default
+        # Set to PROPAGATE as default
         attr_maximum.MaximumAttribute(nan_mode=NanPropagationMode.PROPAGATE)
 
         self._serialize_operator(
             node,
             tosa_graph,
             ts.TosaOp.Op().MAXIMUM,
             [
-                operand_inputs[0].name,
-                operand_inputs[1].name,
+                inputs[0].name,
+                inputs[1].name,
             ],
-            [max_output.name],
+            [output.name],
             attr_maximum,
         )
-
-        if output.dtype == ts.DType.INT8:
-            # insert RESCALE from int32 back to int8
-            tqutils.insert_rescale_op_to_int8(
-                tosa_graph, max_output, scale_back, node, self.tosa_spec
-            )
diff --git a/backends/arm/operators/op_minimum.py b/backends/arm/operators/op_minimum.py
@@ -7,11 +7,6 @@
 
 from typing import Any, List
 
-import executorch.backends.arm.tosa.quant_utils as tqutils
-
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-    get_input_qparams,
-)
 from executorch.backends.arm.operators.node_visitor import (
     NodeVisitor,
     register_node_visitor,
@@ -23,7 +18,6 @@
 )
 from executorch.backends.arm.tosa import TosaSpecification
 from executorch.backends.arm.tosa.mapping import TosaArg
-from executorch.backends.arm.tosa.utils import tosa_shape
 from torch.fx import Node
 
 
@@ -55,51 +49,22 @@ def define_node(
         validate_valid_dtype(
             self.target,
             [*inputs, output],
-            [ts.DType.INT8, ts.DType.INT32, ts.DType.FP32],
+            [ts.DType.INT32, ts.DType.FP32],
             output.tosa_spec,
         )
 
-        scale_back = 1.0
-        min_output = output
-        if inputs[0].dtype == ts.DType.INT8:
-            input_qparams = get_input_qparams(node)
-            if len(input_qparams) != 2:
-                raise ValueError(
-                    f"Both inputs need to have quantization information for {node}"
-                )
-            if input_qparams[0] != input_qparams[1]:
-                raise ValueError(
-                    "Both inputs must have the same quantization parameters for MIN"
-                )
-
-            operand_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
-                tosa_graph, inputs, node, self.tosa_spec
-            )
-
-            output.shape = tosa_shape(output.shape, output.dim_order)
-            min_output = tosa_graph.addIntermediate(output.shape, ts.DType.INT32)
-        else:
-            operand_inputs = inputs
-
         attr_minimum = ts.TosaSerializerAttribute()
-
-        # Set to PROPOGATE as default
+        # Set to PROPAGATE as default
         attr_minimum.MinimumAttribute(nan_mode=NanPropagationMode.PROPAGATE)
 
         self._serialize_operator(
             node,
             tosa_graph,
             ts.TosaOp.Op().MINIMUM,
             [
-                operand_inputs[0].name,
-                operand_inputs[1].name,
+                inputs[0].name,
+                inputs[1].name,
             ],
-            [min_output.name],
+            [output.name],
             attr_minimum,
         )
-
-        if output.dtype == ts.DType.INT8:
-            # insert RESCALE from int32 back to int8
-            tqutils.insert_rescale_op_to_int8(
-                tosa_graph, min_output, scale_back, node, self.tosa_spec
-            )
diff --git a/backends/arm/test/passes/test_insert_rescale_i32_pass.py b/backends/arm/test/passes/test_insert_rescale_i32_pass.py
@@ -22,9 +22,10 @@ def __init__(self):
         super().__init__()
 
     def forward(self, x, y):
-        a = torch.abs(x)
-        b = a > y
-        return b
+        a = torch.maximum(x, y)
+        b = torch.abs(a)
+        c = a > b
+        return c
 
     def get_inputs(self, dtype) -> input_t:
         if dtype == torch.float32:
@@ -44,8 +45,8 @@ def test_insert_rescales():
     ops_not_before = {"executorch_exir_dialects_backend__ops_tosa_RESCALE_default"}
     ops_after = {
         # "number of op nodes with i8 output" + "number of i8 node inputs"
-        "executorch_exir_dialects_backend__ops_tosa_RESCALE_default": 1
-        + 3,
+        "executorch_exir_dialects_backend__ops_tosa_RESCALE_default": 2
+        + 5,
     }
     pipeline = PassPipeline[input_t](
         module,