Arm backend: Make SupportedTOSAOperatorChecks work for INT+FP (#16072)

martinlsm · Martin Lindström · web-flow · commit de5962d1cd48 · 2025-12-04T12:44:19.000+01:00
cc @freddan80 @per @zingo @oscarandersson8218 @digantdesai Signed-off-by: Martin Lindström <Martin.Lindstroem@arm.com> Co-authored-by: Martin Lindström <Martin.Lindstroem@arm.com>
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -112,6 +112,7 @@
 
 from executorch.backends.arm._passes.arm_pass import ArmPass
 from executorch.backends.arm.tosa.specification import (
+    tosa_spec_in_set,
     TosaLoweringContext,
     TosaSpecification,
 )
@@ -308,16 +309,20 @@ def transform_to_backend_pipeline(
         self, exported_program: ExportedProgram, graph_module: GraphModule
     ):
         """Apply passes before transforming program to backend"""
-        if self.tosa_spec in (
-            TosaSpecification.create_from_string("TOSA-1.0+FP"),
-            TosaSpecification.create_from_string("TOSA-1.0+INT"),
+
+        if not tosa_spec_in_set(
+            self.tosa_spec,
+            {
+                TosaSpecification.create_from_string("TOSA-1.0+FP"),
+                TosaSpecification.create_from_string("TOSA-1.0+INT"),
+            },
         ):
-            return self._tosa_pipeline(exported_program, graph_module)
-        else:
-            raise NotImplementedError(
-                f"No pass pipeline implemented for {self.tosa_spec}"
+            raise RuntimeError(
+                f"No pass pipeline found for TOSA specification: {self.tosa_spec}"
             )
 
+        return self._tosa_pipeline(exported_program, graph_module)
+
     def transform_for_annotation_pipeline(self, graph_module: GraphModule):
         # Preprocessing passes
         self.add_pass(RemoveGraphAssertsPass())
diff --git a/backends/arm/operator_support/slice_copy_support.py b/backends/arm/operator_support/slice_copy_support.py
@@ -41,9 +41,6 @@ def is_node_tosa_supported(
         non-unit step sizes.
 
         """
-        if tosa_spec not in self.tosa_specs:
-            return False
-
         args = node.args
         if len(args) == 5 and (step := args[4]) != 1:
             logger.warning(f"{node.target} with step size of {step} not supported.")
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
@@ -146,6 +146,61 @@ def register_tosa_support_check(checker: Type[SupportedTOSAOperatorCheck]):
     return checker
 
 
+def _is_quantized_constant(node: torch.fx.Node) -> bool:
+    if node.target not in (
+        exir_ops.edge.aten.full_like.default,
+        *ComputeConstantOpsAOTPass.targeted_ops,
+    ):
+        return False
+
+    users = tuple(node.users)
+    if users and all(user.target in Q_OPS for user in users):
+        # The node feeds directly into only quantized ops.
+        return True
+
+    for user in users:
+        if user.target == exir_ops.edge.dim_order_ops._to_dim_order_copy.default:
+            dim_order_dtype = get_first_fake_tensor(user).dtype
+            if dim_order_dtype.is_complex or dim_order_dtype.is_floating_point:
+                return False
+        else:
+            return False
+
+    return len(users) > 0
+
+
+def is_quantized(node: torch.fx.Node) -> bool:
+    """Checks if the node is quantized.
+
+    A node is considered quantized if any of the following is true:
+    - Its output dtype is not floating point or complex => integer
+    - It is an op that produces a constant that in turn feeds only quantized users
+    - It has been marked as quantized in the ArmAnnotationInfo custom meta.
+
+    Args:
+        node (torch.fx.Node): The FX node to check.
+
+    Returns:
+        bool: True if the node is quantized, False otherwise.
+    """
+
+    node_dtype = get_first_fake_tensor(node).dtype
+    # Integer-like dtype implies the node is already quantized.
+    if not node_dtype.is_complex and not node_dtype.is_floating_point:
+        return True
+
+    # Nodes introduced during lowering that exclusively feed quantized users.
+    if _is_quantized_constant(node):
+        return True
+
+    # Finally, fall back to the explicit annotation emitted by Arm passes.
+    custom_meta = node.meta.get("custom", {})
+    if ArmAnnotationInfo.CUSTOM_META_KEY in custom_meta:
+        return custom_meta[ArmAnnotationInfo.CUSTOM_META_KEY]["quantized"]
+
+    return False
+
+
 def get_registered_tosa_support_checks(
     tosa_spec: TosaSpecification,
 ) -> list[Type[SupportedTOSAOperatorCheck]]:
@@ -194,9 +249,11 @@ def tosa_support_factory(
         ControlFlowOpSupported(exported_program, tosa_spec, reporter),
     ]
 
-    if tosa_spec.support_integer():
+    if tosa_spec.support_integer() and tosa_spec.support_float():
+        positive_checks.append(TOSAProINTFPSupportList())
+    elif tosa_spec.support_integer():
         positive_checks.append(TOSAProINTSupportList())
-    if tosa_spec.support_float():
+    elif tosa_spec.support_float():
         positive_checks.append(TOSAProFPSupportList())
     # TODO: Refactor to use TOSAProSupportLists + negtive checks
     positive_checks += [
@@ -268,6 +325,27 @@ def is_node_supported(
         return node.op == "call_function" and node.target in TOSA_PRO_FP_SupportList
 
 
+class TOSAProINTFPSupportList(OperatorSupportBase):
+    """
+    TOSA_PRO_INT_FP_SupportList:
+        Ops supported in INT+FP profile via native TOSA ops, decomposition/transformation, pre-compute, or TableOp.
+    """
+
+    def is_node_supported(
+        self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
+    ) -> bool:
+        if node.op != "call_function":
+            return False
+
+        # Select list based on whether the node is quantized.
+        if is_quantized(node) or node.target in (*Q_OPS, *DQ_OPS):
+            support_list = TOSA_PRO_INT_SupportList
+        else:
+            support_list = TOSA_PRO_FP_SupportList
+
+        return node.target in support_list
+
+
 class CheckArmQuantized(OperatorSupportBase):
     """
     Check if the node was marked as quantized in the Arm backend.
@@ -278,60 +356,14 @@ class CheckArmQuantized(OperatorSupportBase):
     def __init__(self, reporter: WhyNoPartitionReporter):
         self.reporter = reporter
 
-    def _is_quantized(self, node: torch.fx.Node) -> bool:
-        """Checks if the node is quantized.
-
-        A node is considered quantized if at least one criteria is met:
-        - Its dtype is not floating point or complex => integer
-        - It is one of the special cases where the node has been created in to_edge, e.g.
-          .Scalar operations that have been promoted .Tensor operations
-          where the scalar is replaced by a full op.
-        - It has been marked as quantized in the ArmAnnotationInfo custom meta.
-
-        Args:
-            node (torch.fx.Node): The FX node to check.
-
-        Returns:
-            bool: True if the node is quantized, False otherwise.
-        """
-        node_dtype = get_first_fake_tensor(node).dtype
-        if not node_dtype.is_complex and not node_dtype.is_floating_point:
-            return True
-        if node.target in (
-            exir_ops.edge.aten.full_like.default,
-            *ComputeConstantOpsAOTPass.targeted_ops,
-        ):
-            # Special cases where nodes have been created in to_edge, e.g.
-            # .Scalar operations that have been promoted .Tensor operations
-            # where the scalar is replaced by a full op.
-            if all(user.target in Q_OPS for user in node.users):
-                return True
-            for user in node.users:
-                if (
-                    user.target
-                    == exir_ops.edge.dim_order_ops._to_dim_order_copy.default
-                ):
-                    dim_order_dtype = get_first_fake_tensor(user).dtype
-                    if dim_order_dtype.is_complex or dim_order_dtype.is_floating_point:
-                        return False
-                else:
-                    return False
-            return True
-        return (
-            ArmAnnotationInfo.CUSTOM_META_KEY in node.meta.get("custom", {})
-            and ArmAnnotationInfo(
-                node.meta["custom"][ArmAnnotationInfo.CUSTOM_META_KEY]
-            ).quantized
-        )
-
     def is_node_supported(
         self, submodules: typing.Mapping[str, torch.nn.Module], node: fx.Node
     ) -> bool:
 
         if node.target in (*DQ_OPS, *Q_OPS):
             return True
 
-        if not self._is_quantized(node):
+        if not is_quantized(node):
             self.reporter.report_reject(
                 node, "Node was not marked as quantized in the Arm backend."
             )
diff --git a/backends/arm/test/misc/test_quant_custom_meta.py b/backends/arm/test/misc/test_quant_custom_meta.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import pytest
 import torch
 from executorch.backends.arm.quantizer import (
     get_symmetric_quantization_config,
@@ -31,31 +32,41 @@ def get_selective_quantizer(modules):
     return Quantize(quantizer, get_symmetric_quantization_config())
 
 
-def test_qdq_squeezed_fp_op():
+@pytest.mark.parametrize("fp_extension", [True, False])
+def test_qdq_squeezed_fp_op(fp_extension: bool):
     """Test that a float operation surrounded by quantize-dequantize pairs
     is correctly handled by the partitioner and the TOSA backend.
     Pattern:
     q -> dq -> add -> q -> dq -> sigmoid -> q -> dq -> mul -> dq -> q
-                        |_____Non-delegated____|
+                        |_____unquantized_____|
     """
     aten_op = "torch.ops.aten.add.Tensor"
     exir_op = "executorch_exir_dialects_edge__ops_aten_add_Tensor"
     module = AddSigmoidMul()
     x = torch.randn(2, 3, 4)
     y = torch.randn(2, 3, 4)
     pipeline = TosaPipelineINT(
-        module=module, test_data=(x, y), aten_op=aten_op, exir_op=exir_op
+        module=module,
+        test_data=(x, y),
+        aten_op=aten_op,
+        exir_op=exir_op,
+        tosa_extensions=["FP"] if fp_extension else None,
     )
     pipeline.change_args("quantize", get_selective_quantizer([torch.nn.Sigmoid]))
-    pipeline.change_args(
-        "check_count.exir",
-        {
-            "torch.ops.higher_order.executorch_call_delegate": 2,
-            "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 1,
-            "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 2,
-            "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 3,
-        },
-    )
+
+    if not fp_extension:
+        # In case we don't have the FP extension, the unquantized part of the
+        # graph should not be delegated to the Arm backend. Modify the op count
+        # checks to reflect this behavior.
+        pipeline.change_args(
+            "check_count.exir",
+            {
+                "torch.ops.higher_order.executorch_call_delegate": 2,
+                "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 1,
+                "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 2,
+                "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 3,
+            },
+        )
     pipeline.run()
 
 
@@ -69,32 +80,41 @@ def forward(self, x, y):
         return self.conv(self.sigmoid(x + y * x))
 
 
-def test_quantized_to_float_transition():
+@pytest.mark.parametrize("fp_extension", [True, False])
+def test_quantized_to_float_transition(fp_extension: bool):
     """Test that a model executing quantized ops followed by float ops
     is correctly handled by the partitioner and the TOSA backend.
     Pattern:
     q -> dq -> mul -> q -> dq -> add -> q -> dq -> sigmoid -> conv
-                                           |____Non-delegated___|
+                                           |___unquantized___|
     """
     aten_op = "torch.ops.aten.add.Tensor"
     exir_op = "executorch_exir_dialects_edge__ops_aten_add_Tensor"
     module = MulAddSigmoidConv()
     x = torch.randn(2, 3, 4)
     y = torch.randn(2, 3, 4)
     pipeline = TosaPipelineINT(
-        module=module, test_data=(x, y), aten_op=aten_op, exir_op=exir_op
+        module=module,
+        test_data=(x, y),
+        aten_op=aten_op,
+        exir_op=exir_op,
+        tosa_extensions=["FP"] if fp_extension else None,
     )
+    if not fp_extension:
+        # In case we don't have the FP extension, the unquantized part of the
+        # graph should not be delegated to the Arm backend. Modify the op count
+        # checks to reflect this behavior.
+        pipeline.change_args(
+            "check_count.exir",
+            {
+                "torch.ops.higher_order.executorch_call_delegate": 1,
+                "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 1,
+                "executorch_exir_dialects_edge__ops_aten_convolution_default": 1,
+                "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 1,
+                "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2,
+            },
+        )
     pipeline.change_args(
         "quantize", get_selective_quantizer([torch.nn.Sigmoid, torch.nn.Conv1d])
     )
-    pipeline.change_args(
-        "check_count.exir",
-        {
-            "torch.ops.higher_order.executorch_call_delegate": 1,
-            "executorch_exir_dialects_edge__ops_aten_sigmoid_default": 1,
-            "executorch_exir_dialects_edge__ops_aten_convolution_default": 1,
-            "executorch_exir_dialects_edge__ops_quantized_decomposed_dequantize_per_tensor_default": 1,
-            "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default": 2,
-        },
-    )
     pipeline.run()
diff --git a/backends/arm/tosa/specification.py b/backends/arm/tosa/specification.py
@@ -386,3 +386,21 @@ def get_context_spec() -> TosaSpecification:
         return TosaLoweringContext.tosa_spec_var.get()
     except LookupError:
         raise RuntimeError("Function must be executed within a TosaLoweringContext")
+
+
+def tosa_spec_in_set(spec: TosaSpecification, specs: Set[TosaSpecification]) -> bool:
+    """Check if a specification matches any in a set, considering base specs.
+
+    Args:
+        spec (TosaSpecification): Specification to check.
+        specs (Set[TosaSpecification]): Set of specifications to match against.
+
+    Returns:
+        bool: True if a match is found, False otherwise.
+
+    """
+    base_specs = TosaSpecMapping._get_base_specs(spec)
+    for base in base_specs:
+        if base in specs:
+            return True
+    return False