Summary:MV2 CortexM PassManager changes for Alif E8

Github Executorch · Github Executorch · commit c9ae6d49a594 · 2026-01-30T14:45:17.000-08:00
Test Plan:
python3 -m examples.arm.aot_arm_compiler -m mv2 --target=cortex-m --quantize --enable_qdq_fusion_pass --intermediates=./mv2_intermediates --output=./mv2_cortex_m.pte

cat ./mv2_intermediates/delegation_info.txt
Delegation info:
Total delegated subgraphs: 0
Number of delegated nodes: 0
Number of non-delegated nodes: 72

Delegation table:
╒════╤═════════════════════════════════════════════╤═══════════════════════════════════╤═══════════════════════════════════════╕
│    │ op_type                                     │   occurrences_in_delegated_graphs │   occurrences_in_non_delegated_graphs │
╞════╪═════════════════════════════════════════════╪═══════════════════════════════════╪═══════════════════════════════════════╡
│  0 │ aten_as_strided_copy_default                │                                 0 │                                     1 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  1 │ aten_mean_dim                               │                                 0 │                                     1 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  2 │ aten_view_copy_default                      │                                 0 │                                     1 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  3 │ cortex_m_dequantize_per_tensor_default      │                                 0 │                                     2 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  4 │ cortex_m_quantize_per_tensor_default        │                                 0 │                                     2 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  5 │ cortex_m_quantized_add_default              │                                 0 │                                    10 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  6 │ cortex_m_quantized_conv2d_default           │                                 0 │                                    35 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  7 │ cortex_m_quantized_depthwise_conv2d_default │                                 0 │                                    17 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  8 │ cortex_m_quantized_linear_default           │                                 0 │                                     1 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│  9 │ cortex_m_transpose_default                  │                                 0 │                                     1 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│ 10 │ dim_order_ops__clone_dim_order_default      │                                 0 │                                     1 │
├────┼─────────────────────────────────────────────┼───────────────────────────────────┼───────────────────────────────────────┤
│ 11 │ Total                                       │                                 0 │                                    72 │
╘════╧═════════════════════════════════════════════╧═══════════════════════════════════╧═══════════════════════════════════════╛

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -33,6 +33,15 @@
 from torch.fx import GraphModule, Node
 
 
+# Passthrough ops that preserve quantization parameters from input to output.
+# These ops should be foldable even without explicit annotation metadata.
+PASSTHROUGH_OPS = {
+    exir_ops.edge.aten.hardtanh.default,
+    exir_ops.edge.aten.relu.default,
+    exir_ops.edge.aten.clamp.default,
+}
+
+
 def _get_special_dtype(qspec: QuantArgs) -> TosaSpecialDtype | None:
     if qspec.dtype == torch.int8:
         if qspec.qmax == 7 and qspec.qmin == -7:
@@ -248,6 +257,26 @@ def _handle_control_flow_node(self, node: Node, graph_module: GraphModule):
                 submodule.graph.erase_node(node_to_remove)
         return
 
+    @staticmethod
+    def _has_dq_input_and_q_output(node: Node) -> bool:
+        """
+        Check if a node has dequantize input(s) and quantize output(s).
+        This indicates the node is part of a quantized computation path.
+        """
+        # Check if any input is from a dequantize op
+        has_dq_input = any(
+            isinstance(arg, Node) and arg.target in DQ_OPS
+            for arg in node.args
+            if isinstance(arg, Node)
+        )
+
+        # Check if any output goes to a quantize op
+        has_q_output = any(
+            user.target in Q_OPS
+            for user in node.users
+        )
+        return has_dq_input and has_q_output
+
     @staticmethod
     def is_foldable(node: Node) -> bool:
         if node.op != "call_function":
@@ -263,6 +292,13 @@ def is_foldable(node: Node) -> bool:
         ):
             return True
 
+        # Passthrough ops (hardtanh, relu, clamp) that have dq inputs and q outputs
+        # should be foldable even without explicit annotation. These ops preserve
+        # quantization parameters and are common in quantized models like MobileNetV2.
+        if node.target in PASSTHROUGH_OPS:
+            if FoldAndAnnotateQParamsPass._has_dq_input_and_q_output(node):
+                return True
+
         # We should not fold q-dq nodes into non-quantized nodes.
         if not (
             ArmAnnotationInfo.CUSTOM_META_KEY in node.meta.get("custom", {})
@@ -335,6 +371,35 @@ def call(self, graph_module: GraphModule) -> PassResult:  # noqa: C901
             ):
                 self._handle_control_flow_node(n, graph_module)
 
+        # Second pass: Propagate qparams through passthrough ops.
+        # For ops like hardtanh that share qparams with their input, we need to:
+        # 1. Copy output_qparams from the passthrough op to its input node
+        # 2. Set input_qparams on the passthrough op
+        for n in graph_module.graph.nodes:
+            n = cast(Node, n)
+            if n.target not in PASSTHROUGH_OPS:
+                continue
+
+            # Check if this passthrough op has output_qparams but missing input_qparams
+            has_output = "output_qparams" in n.meta and len(n.meta.get("output_qparams", {})) > 0
+            has_input = "input_qparams" in n.meta and len(n.meta.get("input_qparams", {})) > 0
+
+            if not has_output or has_input:
+                continue
+
+            # Get the input node
+            if len(n.args) == 0 or not isinstance(n.args[0], Node):
+                continue
+
+            input_node = n.args[0]
+
+            # Propagate: For passthrough ops, output qparams equal input qparams
+            if "output_qparams" not in input_node.meta:
+                input_node.meta["output_qparams"] = n.meta["output_qparams"]
+
+            # Set input_qparams from output_qparams (same for passthrough ops)
+            n.meta["input_qparams"] = {0: n.meta["output_qparams"][0]}
+
         # retrace the graph to update the fake tensor types
         graph_module = super().call(graph_module).graph_module
 
diff --git a/backends/cortex_m/passes/convert_to_cortex_m_pass.py b/backends/cortex_m/passes/convert_to_cortex_m_pass.py
@@ -69,6 +69,45 @@ def _get_batch_size_from_conv(self, conv_node: torch.fx.Node):
             pass
         return None
 
+    def _get_addmm_replacement(self, node):
+        """
+        Handle aten.addmm which has signature: addmm(bias, input, weight)
+        This is the decomposed form of aten.linear.
+        """
+        bias = node.args[0]
+        input_tensor = node.args[1]
+        weights = node.args[2]
+
+        if "input_qparams" not in node.meta or "output_qparams" not in node.meta:
+            return None
+
+        input_qp = node.meta["input_qparams"].get(0)
+        weight_qp = node.meta["input_qparams"].get(1)
+        output_qp = node.meta["output_qparams"].get(0)
+
+        if not input_qp or not weight_qp or not output_qp:
+            return None
+
+        quantized_multiplier, quantized_shift = quantize_multiplier_aot(
+            (input_qp.scale * weight_qp.scale) / output_qp.scale
+        )
+
+        args = (
+            input_tensor,
+            weights,
+            bias,
+            None,
+            input_qp.zp,
+            weight_qp.zp,
+            output_qp.zp,
+            [quantized_multiplier],
+            [quantized_shift],
+            input_qp.qmax,
+            input_qp.qmin,
+        )
+
+        return exir_ops.edge.cortex_m.quantized_linear.default, args
+
     def _get_linear_replacement(self, node):
         """
          Let
@@ -386,6 +425,11 @@ def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
             match node.target:
                 case exir_ops.edge.aten.linear.default:
                     op, args = self._get_linear_replacement(node)
+                case exir_ops.edge.aten.addmm.default:
+                    result = self._get_addmm_replacement(node)
+                    if result is None:
+                        continue
+                    op, args = result
                 case exir_ops.edge.aten.convolution.default:
                     # Check if it's transposed convolution (arg index 6)
                     transposed = node.args[6] if len(node.args) > 6 else False
diff --git a/backends/cortex_m/passes/cortex_m_pass_manager.py b/backends/cortex_m/passes/cortex_m_pass_manager.py
@@ -13,6 +13,9 @@
 from executorch.backends.transforms.replace_scalar_with_tensor import (
     ReplaceScalarWithTensorArgPass,
 )
+from executorch.backends.arm._passes.decompose_adaptive_avg_pool2d_pass import (
+    DecomposeAdaptiveAvgPool2dPass,
+)
 from executorch.exir.pass_base import ExportPass
 from executorch.exir.pass_manager import PassManager
 from executorch.exir.program._program import _transform
@@ -33,6 +36,7 @@ class CortexMPassManager(PassManager):
         ReplaceScalarWithTensorArgPass,
         ReplaceQuantNodesPass,
         ActivationFusionPass,
+        DecomposeAdaptiveAvgPool2dPass,
         DecomposeHardswishPass,
         QuantizedOpFusionPass,
         ConvertToCortexMPass,
@@ -44,12 +48,22 @@ class CortexMPassManager(PassManager):
         ClampHardswishPass,
     ]
 
-    def __init__(self, exported_program, passes=None):
+    def __init__(self, exported_program, passes=None, skip_passes=None):
+        """
+        Initialize CortexMPassManager.
+
+        Args:
+            exported_program: The ExportedProgram to transform.
+            passes: Optional custom pass list. Uses default pass_list if None.
+            skip_passes: Optional list of pass classes to skip.
+        """
         self.exported_program = exported_program
         if passes is not None:
             self.passes = passes
         else:
-            self.passes = self.pass_list
+            self.passes = list(self.pass_list)
+        if skip_passes:
+            self.passes = [p for p in self.passes if p not in skip_passes]
 
     def transform_for_annotation(self, model):
         passes = self.pass_list_transform_for_annotation
diff --git a/backends/cortex_m/quantizer/quantizer.py b/backends/cortex_m/quantizer/quantizer.py
@@ -448,6 +448,11 @@ class SharedQspecQuantizer(Quantizer):
         torch.ops.aten._unsafe_view.default,
         torch.ops.aten.unflatten.int,
         torch.ops.aten.flatten.using_ints,
+        # Additional passthrough ops for MobileNetV2 and similar architectures
+        torch.ops.aten.hardtanh.default,
+        torch.ops.aten.hardtanh_.default,
+        torch.ops.aten.max_pool2d.default,
+        torch.ops.aten.dropout.default,
     ]
 
     def __init__(self, targets: Optional[List[OpOverload]] = None) -> None:
diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py