pytorch
diff --git a/‎backends/nxp/aten_passes/move_activation_before_concat.py‎
Lines changed: 102 additions & 0 deletions b/‎backends/nxp/aten_passes/move_activation_before_concat.py‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎backends/nxp/aten_passes/neutron_aten_pass_manager.py‎
Lines changed: 8 additions & 1 deletion b/‎backends/nxp/aten_passes/neutron_aten_pass_manager.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎backends/nxp/quantizer/neutron_quantizer.py‎
Lines changed: 9 additions & 1 deletion b/‎backends/nxp/quantizer/neutron_quantizer.py‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎backends/nxp/quantizer/patterns.py‎
Lines changed: 146 additions & 1 deletion b/‎backends/nxp/quantizer/patterns.py‎
Lines changed: 146 additions & 1 deletion
diff --git a/‎backends/nxp/tests/test_batch_norm_fusion.py‎
Lines changed: 6 additions & 3 deletions b/‎backends/nxp/tests/test_batch_norm_fusion.py‎
Lines changed: 6 additions & 3 deletions
@@ -0,0 +1,102 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+import torch
+
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+
+from torch.fx import GraphModule, Node
+from torch.fx.passes.infra.pass_base import PassBase, PassResult
+
+
+class MoveActivationBeforeConcat(PassBase):
+    """Move some operators around in the following pattern.
+    This is a common pattern that emerges from the conversion of separable convolutions.
+    This optimization works together with joint quantization of compute nodes and activations. Without it,
+    it is not beneficial.
+
+             │                    │                               │                     │
+      ┌──────▼──────┐      ┌──────▼──────┐                 ┌──────▼──────┐       ┌──────▼──────┐
+      │ aten.conv2d │  ... │ aten.conv2d │                 │ aten.conv2d │  ...  │ aten.conv2d │
+      └──────┬──────┘      └──────┬──────┘                 └──────┬──────┘       └──────┬──────┘
+             └───────┐     ┌──────┘                               │                     │
+                  ┌──▼─────▼─┐           replace with       ┌─────▼─────┐         ┌─────▼─────┐
+                  │ aten.cat │          ──────────────►     │ aten.relu │   ...   │ aten.relu │
+                  └────┬─────┘                              └─────┬─────┘         └─────┬─────┘
+                       │                                          └───────┐     ┌───────┘
+                 ┌─────▼─────┐                                         ┌──▼─────▼─┐
+                 │ aten.relu │                                         │ aten.cat │
+                 └─────┬─────┘                                         └────┬─────┘
+                       │                                                    │
+    """
+
+    def __init__(self, neutron_target_spec: NeutronTargetSpec):
+        self.neutron_target_spec = neutron_target_spec
+
+    def call(self, module: GraphModule) -> bool:
+        def _is_concat(node_: Node) -> bool:
+            return (
+                node_.op == "call_function"
+                and node_.target == torch.ops.aten.cat.default
+            )
+
+        made_changes = False
+
+        for node in module.graph.nodes:
+            if not _is_concat(node):
+                continue  # Not cat node.
+
+            cat_node = node
+            activation = next(iter(cat_node.users))
+
+            # Check if all cat inputs nodes are conv 2D or linear 2D type and their only user is cat.
+            if not all(
+                self.neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__aten(
+                    input_node
+                )
+                and len(input_node.users) == 1
+                for input_node in cat_node.all_input_nodes
+            ):
+                continue
+
+            # Check if following activation is supported on Neutron as fused activation.
+            if not (
+                len(cat_node.users) == 1
+                and self.neutron_target_spec.neutron_target_info.is_supported_fused_activation__aten(
+                    activation
+                )
+            ):
+                continue
+
+            # Loop all Cat input nodes and insert new activation after node.
+            for input_node in cat_node.all_input_nodes:
+                with module.graph.inserting_after(input_node):
+                    new_activation = module.graph.call_function(
+                        activation.target,
+                        args=(*activation.args[1:],),
+                        kwargs=activation.kwargs,
+                    )
+
+                    new_activation.meta["source_fn_stack"] = [
+                        (
+                            new_activation.name,
+                            activation.meta["source_fn_stack"][-1][-1],
+                        )
+                    ]
+                    new_activation.meta["val"] = input_node.meta["val"]
+
+                    # Replace the uses of the input node with the new activation node.
+                    input_node.replace_all_uses_with(new_activation)
+                    new_activation.args = (input_node, *new_activation.args)
+
+            # Replace the uses of the activation node with the cat node.
+            activation.replace_all_uses_with(cat_node)
+
+            module.graph.erase_node(activation)
+
+            made_changes = True
+
+        return PassResult(module, made_changes)
@@ -16,6 +16,9 @@
 from executorch.backends.nxp.aten_passes.fuse_linear_and_add_pass import (
     FuseLinearAndAddPass,
 )
+from executorch.backends.nxp.aten_passes.move_activation_before_concat import (
+    MoveActivationBeforeConcat,
+)
 from executorch.backends.nxp.aten_passes.remove_nodes_with_known_outputs import (
     RemoveNodesWithKnownOutputs,
 )
@@ -25,6 +28,7 @@
 from executorch.backends.nxp.aten_passes.split_gru_based_on_num_layers import (
     SplitGRUBasedOnNumLayers,
 )
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.exir.pass_manager import PassManager
 from torch import nn
 from torch.fx.passes.infra.pass_base import PassResult
@@ -34,14 +38,17 @@
 
 class NeutronAtenPassManager(PassManager):
 
-    def __init__(self, passes: list[PassType] = None):
+    def __init__(
+        self, neutron_target_spec: NeutronTargetSpec, passes: list[PassType] = None
+    ):
         passes: list[PassType] = passes or [
             FuseBatchNormWithConvPass(),
             FuseBatchNormWithLinearPass(),
             SplitGroupConvolution(),
             SplitGRUBasedOnNumLayers(),
             RemoveNodesWithKnownOutputs(),
             FuseLinearAndAddPass(),
+            MoveActivationBeforeConcat(neutron_target_spec),
         ]
 
         super().__init__(passes)
 
@@ -12,6 +12,7 @@
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.backends.nxp.quantizer.patterns import (
     AbsPattern,
+    ActivationsConcatClusterPattern,
     AdaptiveAvgPoolPattern,
     AddmmPattern,
     AddTensorPattern,
@@ -225,13 +226,16 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec):
         self.op_to_applied_quantizer = {
             pt: False for q in self.quantizers for pt in q.pattern.partition_types()
         }
+        self.cluster_quantizers = [
+            NeutronAtenQuantizer(ActivationsConcatClusterPattern(self), static_qconfig)
+        ]
 
     def transform_for_annotation(
         self, model: torch.fx.GraphModule
     ) -> torch.fx.GraphModule:
         model.graph.eliminate_dead_code()  # Remove dead code to simplify the graph for the passes.
 
-        model = NeutronAtenPassManager()(model).graph_module
+        model = NeutronAtenPassManager(self.neutron_target_spec)(model).graph_module
 
         model.graph.eliminate_dead_code()  # Remove dead code again, in case it was created by the passes.
 
@@ -240,6 +244,10 @@ def transform_for_annotation(
     def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
         self._annotate_inputs(model)
 
+        # Annotate node clusters in model
+        for cluster_quantizer in self.cluster_quantizers:
+            cluster_quantizer.annotate(model)
+
         nodes = list(model.graph.nodes)
         for node in nodes:
             if (
 
@@ -13,13 +13,15 @@
 from executorch.backends.nxp.quantizer.utils import get_bias_qparams
 from torch import fx
 from torch._ops import OpOverload
+from torch.fx import Node
 from torchao.quantization.pt2e import PerChannelMinMaxObserver
 from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     FixedQParamsQuantizationSpec,
     QuantizationSpec,
     SharedQuantizationSpec,
 )
+
 from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
 
 
@@ -199,7 +201,6 @@ def partition_types(self) -> list[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors:
-        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         addmm_node = fused_partition[0].nodes[-1]
 
         bias_qspec = DerivedQuantizationSpec(
@@ -745,3 +746,147 @@ def get_anchors(
         return get_anchors_for_fixed_quant_specs(
             fused_partition, scale=1.0 / 128.0, zero_point=0
         )
+
+
+class ActivationsConcatClusterPattern(QuantizationPattern):
+    """
+    Quantizer for activations concat cluster pattern.
+
+    The quantizer matches a pattern where concat node is preceded by activation nodes preceded by Conv 2D or Linear.
+    All activation nodes quantization parameters must be the same. Only activations, that have support for fusion
+    to preceding compute node on Neutron are allowed. This cluster is usually produced by MoveActivationBeforeConcat
+    pass. Cluster schema:
+
+            │                     │
+     ┌──────▼──────┐       ┌──────▼──────┐
+     │ aten.conv2d │  ...  │ aten.conv2d │
+     └──────┬──────┘       └──────┬──────┘
+            │                     │
+      ┌─────▼─────┐         ┌─────▼─────┐
+      │ aten.relu │   ...   │ aten.relu │
+      └─────┬─────┘         └─────┬─────┘
+            └───────┐     ┌───────┘
+                 ┌──▼─────▼─┐
+                 │ aten.cat │
+                 └────┬─────┘
+                      │
+    """
+
+    def __init__(self, neutron_quantizer):
+        self.neutron_quantizer = neutron_quantizer
+        self.neutron_target_info = (
+            self.neutron_quantizer.neutron_target_spec.neutron_target_info
+        )
+
+    @staticmethod
+    def _all_activations_are_equal(activations: list[Node]) -> bool:
+        first_input_node = activations[0]
+        hardtanh_t = [
+            torch.ops.aten.hardtanh.default,
+            torch.ops.aten.hardtanh_.default,
+        ]
+        relu_t = [
+            torch.ops.aten.relu.default,
+            torch.ops.aten.relu_.default,
+        ]
+        tanh_t = [
+            torch.ops.aten.tanh.default,
+            torch.ops.aten.tanh_.default,
+        ]
+
+        def _activations_are_equal(activation1: Node, activation2: Node) -> bool:
+            if (  # Targets are equal also with their inplace variants
+                activation1.target in hardtanh_t
+                and activation2.target in hardtanh_t
+                or activation1.target in relu_t
+                and activation2.target in relu_t
+                or activation1.target in tanh_t
+                and activation2.target in tanh_t
+                or activation1.target == torch.ops.aten.sigmoid.default
+                and activation2.target == torch.ops.aten.sigmoid.default
+            ):
+                return True
+            elif (  # Hardtanh with min_val 0 and max_val 'inf' is equal to Relu
+                activation1.target in hardtanh_t
+                and activation1.args[1:] == (0.0, float("inf"))
+                and activation2.target in relu_t
+                or activation1.target in relu_t
+                and activation2.target in hardtanh_t
+                and activation2.args[1:] == (0.0, float("inf"))
+            ):
+                return True
+            else:
+                return False
+
+        return all(
+            _activations_are_equal(activation, first_input_node)
+            for activation in activations
+        )
+
+    def partition_types(self) -> list[OpOverload]:
+        return [torch.ops.aten.cat.default]
+
+    def get_anchors(
+        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+    ) -> PartitionAnchors | None:
+        cat_node = fused_partition[0].nodes[-1]
+
+        # Check all cat inputs are supported activations
+        if not all(
+            self.neutron_target_info.is_supported_fused_activation__aten(input_node)
+            for input_node in cat_node.all_input_nodes
+        ):
+            return None
+
+        # Check all cat inputs are equal activations
+        if not self._all_activations_are_equal(cat_node.all_input_nodes):
+            return None
+
+        # Check compute nodes are Conv 2D or Linear
+        if not all(
+            self.neutron_target_info.is_fusable_conv_or_linear__aten(compute_node)
+            for input_node in cat_node.all_input_nodes
+            for compute_node in input_node.all_input_nodes
+        ):
+            return None
+
+        # Annotate compute nodes
+        for input_node in cat_node.all_input_nodes:
+            for compute_node in input_node.all_input_nodes:
+                if compute_node.target not in self.neutron_quantizer.op_to_quantizer:
+                    return None
+                compute_node_quantizer = self.neutron_quantizer.op_to_quantizer[
+                    compute_node.target
+                ]
+                compute_node_quantizer.annotate(gm)
+                del compute_node.meta["quantization_annotation"].output_qspec
+
+        # Annotate activations
+        for input_node in cat_node.all_input_nodes:
+            if input_node.target not in self.neutron_quantizer.op_to_quantizer:
+                return None
+            activation_quantizer = self.neutron_quantizer.op_to_quantizer[
+                input_node.target
+            ]
+            activation_quantizer.annotate(gm)
+            input_node.meta["quantization_annotation"].input_qspec_map = {}
+
+        # Annotate cat node
+        inputs = []
+        first_input_node = cat_node.all_input_nodes[0]
+        for idx in range(len(cat_node.all_input_nodes)):
+            inputs.append(
+                (
+                    cat_node,
+                    NodeArgsIdx(0, idx),
+                    SharedQuantizationSpec(first_input_node),
+                )
+            )
+        outputs = [(cat_node, SharedQuantizationSpec(first_input_node))]
+
+        return PartitionAnchors(
+            inputs=inputs,
+            weights=[],
+            biases=[],
+            output=outputs,
+        )
@@ -18,7 +18,10 @@
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.view_copy_converter import (
     ViewCopyConverter,
 )
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    neutron_target_spec,
+    to_quantized_edge_program,
+)
 from executorch.backends.nxp.tests.executors import OverrideTargetSupportCheck
 from torch import nn
 
@@ -98,7 +101,7 @@ def test_batch_norm_conv_fusing(bias: bool, input_shape: list[int]):
     program = torch.export.export(module, example_input, strict=True)
     og_module = program.module()
 
-    pm = NeutronAtenPassManager()
+    pm = NeutronAtenPassManager(neutron_target_spec)
     graph_module_out = pm(deepcopy(program.module())).graph_module
 
     # Make sure the fusion worked.
@@ -133,7 +136,7 @@ def test_batch_norm_linear_fusing(bias: bool):
     program = torch.export.export(module, example_input, strict=True)
     og_module = program.module()
 
-    pm = NeutronAtenPassManager()
+    pm = NeutronAtenPassManager(neutron_target_spec)
     graph_module_out = pm(deepcopy(program.module())).graph_module
 
     # Make sure the fusion worked.