From 07a116c456542f3e3c33b42bb97ec1153d69909a Mon Sep 17 00:00:00 2001
From: Roman Janik <roman.janik@nxp.com>
Date: Fri, 12 Sep 2025 11:46:41 +0200
Subject: [PATCH 1/3] Remove Fuse activation functions IR optimization

---
 .../fuse_activation_functions.py              | 235 ------------------
 .../backend/ir/tflite_optimizer/optimizer.py  |   8 -
 2 files changed, 243 deletions(-)
 delete mode 100755 backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py

diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py b/backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py
deleted file mode 100755
index 6b657c4d5b1..00000000000
--- a/backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright 2024 NXP
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-from executorch.backends.nxp.backend.ir import logger
-from executorch.backends.nxp.backend.ir.lib.tflite.ActivationFunctionType import (
-    ActivationFunctionType,
-)
-from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import (
-    BuiltinOperator,
-)
-from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
-from executorch.backends.nxp.backend.ir.tflite_optimizer.graph_utils import (
-    operator_is_type,
-)
-from executorch.backends.nxp.backend.ir.tflite_optimizer.operator_rules import (
-    NoFusedActivationFunction,
-)
-from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.base_optimization import (
-    BaseOptimization,
-)
-from executorch.backends.nxp.backend.ir.tflite_optimizer.pattern_matcher import (
-    Op,
-    PatternMatcher,
-)
-from executorch.backends.nxp.backend.ir.tflite_optimizer.tensor_rules import (
-    TensorHasOneConsumer,
-)
-
-
-class FuseActivationFunctions(BaseOptimization):
-    ops_with_fused_activation_function = [
-        "Conv2D",
-        "Conv3D",
-        "DepthwiseConv2D",
-        "TransposeConv",
-        "MaxPool2D",
-        "AveragePool2D",
-        "SVDF",
-        "FullyConnected",
-        "Add",
-        "Mul",
-        "Sub",
-        "Div",
-        # 'Concatenation',  # currently disabled
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/concatenation.cc#L139
-        # 'L2Norm',  # currently disabled
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/l2norm.cc#L72
-        # LSTM operators will always already have fused activation functions. They are assigned in `convert_lstm.py`.
-        # 'LSTM', 'UnidirectionalSequenceLSTM', 'BidirectionalSequenceLSTM'
-        # RNN operators will always already have fused activation functions. They are assigned in `convert_rnn.py`.
-        # 'RNN', 'SequenceRNN', 'BidirectionalSequenceRNN',
-    ]
-
-    activation_functions = ["Relu", "ReluN1To1", "Relu6", "Tanh", "Sign"]
-
-    supported_activations_for_op: dict[
-        BuiltinOperator, list[ActivationFunctionType]
-    ] = {
-        BuiltinOperator.CONV_2D: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/conv.cc#L912
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.CONV_3D: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/conv3d.cc#L213
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.DEPTHWISE_CONV_2D: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/depthwise_conv.cc#L307
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.TRANSPOSE_CONV: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/transpose_conv.cc#L516
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.MAX_POOL_2D: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/pooling.cc#L247
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.AVERAGE_POOL_2D: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/pooling.cc#L124
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.FULLY_CONNECTED: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/fully_connected.cc#L627-L630
-        BuiltinOperator.ADD: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/add.cc#L246
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.MUL: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/mul.cc#L159
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.SUB: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/sub.cc#L306
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.DIV: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/div.cc#L180
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300
-        BuiltinOperator.SVDF: [ActivationFunctionType.RELU],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/svdf.cc#L394
-        BuiltinOperator.RNN: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-            ActivationFunctionType.TANH,
-            ActivationFunctionType.SIGN_BIT,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/basic_rnn.cc#L222
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/kernel_utils.cc#L71
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/tensor_utils.h#L58-L77
-        BuiltinOperator.UNIDIRECTIONAL_SEQUENCE_RNN: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-            ActivationFunctionType.TANH,
-            ActivationFunctionType.SIGN_BIT,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/6887368d6d46223f460358323c4b76d61d1558a8/tensorflow/lite/kernels/unidirectional_sequence_rnn.cc#L239
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/kernel_utils.cc#L71
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/tensor_utils.h#L58-L77
-        BuiltinOperator.BIDIRECTIONAL_SEQUENCE_RNN: [
-            ActivationFunctionType.RELU,
-            ActivationFunctionType.RELU_N1_TO_1,
-            ActivationFunctionType.RELU6,
-            ActivationFunctionType.TANH,
-            ActivationFunctionType.SIGN_BIT,
-        ],
-        # https://github.com/tensorflow/tensorflow/blob/6887368d6d46223f460358323c4b76d61d1558a8/tensorflow/lite/kernels/bidirectional_sequence_rnn.cc#L433
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/kernel_utils.cc#L71
-        # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/tensor_utils.h#L58-L77
-    }
-
-    ops_that_need_equal_io_quantization = [
-        # Documented restrictions from https://www.tensorflow.org/lite/performance/quantization_spec
-        BuiltinOperator.AVERAGE_POOL_2D,
-        BuiltinOperator.MAX_POOL_2D,
-        BuiltinOperator.CONCATENATION,
-    ]
-
-    def _act_fun_type_for_op(self, op: tflite_model.Operator) -> ActivationFunctionType:
-        if operator_is_type(op, "Relu", self._builder):
-            return ActivationFunctionType.RELU
-        elif operator_is_type(op, "ReluN1To1", self._builder):
-            return ActivationFunctionType.RELU_N1_TO_1
-        elif operator_is_type(op, "Relu6", self._builder):
-            return ActivationFunctionType.RELU6
-        elif operator_is_type(op, "Tanh", self._builder):
-            return ActivationFunctionType.TANH
-        elif operator_is_type(op, "Sign", self._builder):
-            return ActivationFunctionType.SIGN_BIT
-
-    def __call__(self) -> bool:
-        matcher = PatternMatcher(
-            self._builder,
-            [
-                Op(
-                    self.ops_with_fused_activation_function,
-                    ["x"],
-                    ["x1"],
-                    [NoFusedActivationFunction()],
-                ),
-                Op(self.activation_functions, ["x1"], ["y"]),
-            ],
-            [TensorHasOneConsumer("x1")],
-        )
-
-        to_remove = []
-        for [leading_op, act_fun_op], tensor_map, _, _ in matcher.match_patterns():
-            builtin_leading_op = leading_op.builtin_options.operator_type
-            logger.internal_assert(
-                builtin_leading_op in self.supported_activations_for_op.keys(),
-                f"FuseActivationFunctions: supported activations for operator `{builtin_leading_op}`"
-                "are not known.",
-            )
-
-            act_fun = self._act_fun_type_for_op(act_fun_op)
-            if act_fun not in self.supported_activations_for_op[builtin_leading_op]:
-                # The leading op doesn't support this activation function.
-                continue
-
-            x, y = tensor_map["x"], tensor_map["y"]
-            if (
-                x.quantization != y.quantization
-                and builtin_leading_op in self.ops_that_need_equal_io_quantization
-            ):
-                # The fusion would result in different input and output quantization of `leading_op`, which would cause
-                #  runtime issues for that particular operator.
-                continue
-
-            leading_op.builtin_options.fused_activation_function = act_fun
-            leading_op.tmp_outputs[0] = act_fun_op.tmp_outputs[0]
-            to_remove.append(act_fun_op)
-
-        for op in to_remove:
-            self._builder.get_operators().remove(op)
-
-        return len(to_remove) != 0
diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py
index 69b75b72cdd..3611c55e995 100755
--- a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py
+++ b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py
@@ -11,9 +11,6 @@
 
 from executorch.backends.nxp.backend.ir import logger
 from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig
-from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.fuse_activation_functions import (
-    FuseActivationFunctions,
-)
 from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.move_relu_before_concat import (
     MoveActivationBeforeConcatenation,
 )
@@ -27,8 +24,6 @@
 
 
 class Optimization(Enum):
-    FUSE_ACTIVATION_FUNCTIONS = 1
-
     FUSE_TRANSPOSE_OPERATORS = 5
     REMOVE_IDENTITY_TRANSPOSE_OPERATORS = 6
 
@@ -64,9 +59,6 @@ def __init__(
         self._builder = builder
 
         self.optimization_map = {
-            Optimization.FUSE_ACTIVATION_FUNCTIONS: FuseActivationFunctions(
-                builder, conversion_config
-            ),
             Optimization.FUSE_TRANSPOSE_OPERATORS: FuseTransposeOperators(
                 builder, conversion_config
             ),

From a3684520a42ea9aa4518f38a7b618f54a575204b Mon Sep 17 00:00:00 2001
From: Roman Janik <roman.janik@nxp.com>
Date: Tue, 9 Sep 2025 17:57:27 +0200
Subject: [PATCH 2/3] Make Relu quantization non-shared

---
 backends/nxp/quantizer/patterns.py | 42 +++++++++++++++++-------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
index 9588ce24c9e..47e487494c6 100644
--- a/backends/nxp/quantizer/patterns.py
+++ b/backends/nxp/quantizer/patterns.py
@@ -121,6 +121,24 @@ def get_anchors(
         )
 
 
+class SingleInputBasicPattern(QuantizationPattern):
+    @abstractmethod
+    def partition_types(self) -> list[OpOverload]:
+        pass
+
+    def get_anchors(
+            self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+    ) -> PartitionAnchors | None:
+        node = fused_partition[0].nodes[-1]
+
+        return PartitionAnchors(
+            inputs=[(node, NodeArgsIdx(0))],
+            weights=[],
+            biases=[],
+            output=[(node,)],
+        )
+
+
 def get_anchors_for_fixed_quant_specs(
     fused_partition: list[fx.GraphModule],
     scale: float,
@@ -376,7 +394,7 @@ def partition_types(self):
         return [torch.ops.aten.flatten.using_ints]
 
 
-class HardTanhPattern(QuantizationPattern):
+class HardTanhPattern(SingleInputBasicPattern):
     """
     Quantizer for HardTanh operator. Shared quantization spec is selected, as activation functions usually follows
     computation layer.
@@ -385,23 +403,12 @@ class HardTanhPattern(QuantizationPattern):
     def partition_types(self):
         return [torch.ops.aten.hardtanh.default]
 
-    def get_anchors(
-        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
-    ) -> PartitionAnchors | None:
-        node = fused_partition[0].nodes[-1]
-
-        return PartitionAnchors(
-            inputs=[(node, NodeArgsIdx(0))],
-            weights=[],
-            biases=[],
-            output=[(node,)],
-        )
 
     def replacement_op(self):
         raise AssertionError()
 
 
-class HardTanhInPlacePattern(QuantizationPattern):
+class HardTanhInPlacePattern(SingleInputBasicPattern):
     """
     Quantizer for HardTanh operator with param inplace=True. Shared quantization spec is selected, as activation
     functions usually follows computation layer.
@@ -513,19 +520,18 @@ def partition_types(self):
         return [torch.ops.aten.permute.default]
 
 
-class ReluPattern(SharedSpecPattern):
+class ReluPattern(SingleInputBasicPattern):
     """
-    Quantizer for Relu operator. Shared quantization spec is selected, as ReLU usually follows computation layer.
+    Quantizer for Relu operator.
     """
 
     def partition_types(self):
         return [torch.ops.aten.relu.default]
 
 
-class ReluInPlacePattern(SharedSpecPattern):
+class ReluInPlacePattern(SingleInputBasicPattern):
     """
-    Quantizer for Relu operator with param inplace=True. Shared quantization spec is selected, as ReLU usually
-    follows computation layer.
+    Quantizer for Relu operator with param inplace=True.
     """
 
     def partition_types(self):

From 6fdef26661f2ba592585124d46a56052b4125ac5 Mon Sep 17 00:00:00 2001
From: Roman Janik <roman.janik@nxp.com>
Date: Fri, 12 Sep 2025 14:15:05 +0200
Subject: [PATCH 3/3] Quantize Addmm, Conv2d, Linear, Mm together with fusable
 activations

+ Move fused activations to separate QDQ cluster
---
 backends/nxp/backend/edge_helper.py           |  25 +-
 backends/nxp/backend/neutron_target_spec.py   |  79 ++++
 ...operator_into_separate_qdq_cluster_pass.py |  19 +
 backends/nxp/neutron_partitioner.py           |   4 +
 backends/nxp/quantizer/neutron_quantizer.py   |  14 +-
 backends/nxp/quantizer/patterns.py            | 131 ++++++-
 backends/nxp/tests/executorch_pipeline.py     |  24 +-
 backends/nxp/tests/models.py                  |  70 ++++
 backends/nxp/tests/test_edge_passes.py        | 263 +++++++++++--
 .../nxp/tests/test_per_channel_conversion.py  |  14 +-
 backends/nxp/tests/test_quantizer.py          | 371 +++++++++++++++---
 backends/nxp/tests/test_removing_dead_code.py |   7 +-
 .../nxp/tests/test_split_group_convolution.py |   3 +-
 examples/nxp/aot_neutron_compile.py           |   6 +-
 14 files changed, 899 insertions(+), 131 deletions(-)

diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py
index 60b367c0f39..9b390790f3c 100644
--- a/backends/nxp/backend/edge_helper.py
+++ b/backends/nxp/backend/edge_helper.py
@@ -4,10 +4,29 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
+
 from torch.fx import GraphModule, Node
 from torch.nn import Parameter
 
 
+def _is_dequantize(node_: Node) -> bool:
+    return node_.op == "call_function" and node_.target.__name__ in [
+        "dequantize_per_tensor.default",
+        "quantized_decomposed.dequantize_per_tensor.default",
+        "dequantize_per_channel.default",
+        "quantized_decomposed.dequantize_per_channel.default",
+    ]
+
+
+def _is_quantize(node_: Node) -> bool:
+    return node_.op == "call_function" and node_.target.__name__ in [
+        "quantize_per_tensor.default",
+        "quantized_decomposed.quantize_per_tensor.default",
+        "quantize_per_channel.default",
+        "quantized_decomposed.quantize_per_channel.default",
+    ]
+
+
 def input_tensor(node: Node, input_index: int) -> torch.Tensor:
     if len(node.all_input_nodes) <= input_index:
         raise IndexError
@@ -62,12 +81,6 @@ def node_is_effectively_static_tensor(
     if node_is_static_tensor(node, parameters_mapping):
         return True
 
-    def _is_dequantize(node_: Node) -> bool:
-        return node_.target.__name__ in {
-            "quantized_decomposed.dequantize_per_tensor.default",
-            "quantized_decomposed.dequantize_per_channel.default",
-        }
-
     return _is_dequantize(node) and node_is_static_tensor(
         node.args[0], parameters_mapping
     )
diff --git a/backends/nxp/backend/neutron_target_spec.py b/backends/nxp/backend/neutron_target_spec.py
index 44399982e29..cf718991858 100644
--- a/backends/nxp/backend/neutron_target_spec.py
+++ b/backends/nxp/backend/neutron_target_spec.py
@@ -7,9 +7,14 @@
 
 from enum import Enum
 
+import torch
+
 from executorch.backends.nxp.backend.neutron_converter_manager import (
     NeutronConverterManager,
 )
+from executorch.exir.dialects._ops import ops as exir_ops
+
+from torch.fx import Node
 
 
 class NeutronHWVersion(Enum):
@@ -17,6 +22,77 @@ class NeutronHWVersion(Enum):
     N3 = 2
 
 
+class NeutronTargetNeutronC:
+    @staticmethod
+    def is_supported_fused_activation(node_: Node) -> bool:
+        """Node operator is supported fused activation on Neutron for Linear and Conv2D."""
+        return node_.op == "call_function" and (
+            node_.target
+            in (
+                torch.ops.aten.relu.default,  # TODO Add torch.ops.aten.leaky_relu.default once it is supported
+                torch.ops.aten.relu_.default,
+                torch.ops.aten.sigmoid.default,
+                torch.ops.aten.sigmoid_.default,
+                torch.ops.aten.tanh.default,
+                torch.ops.aten.tanh_.default,
+            )
+            or (
+                (
+                    node_.target == torch.ops.aten.hardtanh.default
+                    or node_.target == torch.ops.aten.hardtanh_.default
+                )
+                and (
+                    node_.args[1:3] == (0.0, 6.0)  # is converted to Relu6
+                    or node_.args[1:3] == (0.0, float("inf"))  # is converted to Relu
+                )
+            )
+        )
+
+    @staticmethod
+    def is_supported_fused_activation__edge(node_: Node) -> bool:
+        """Node operator is supported fused activation on Neutron for Linear and Conv2D."""
+        return node_.op == "call_function" and (
+            node_.target
+            in (
+                exir_ops.edge.aten.relu.default,  # TODO Add torch.ops.aten.leaky_relu.default once it is supported
+                exir_ops.edge.aten.sigmoid.default,
+                exir_ops.edge.aten.tanh.default,
+            )
+            or (
+                (node_.target == exir_ops.edge.aten.hardtanh.default)
+                and (
+                    node_.args[1:3] == (0.0, 6.0)  # is converted to Relu6
+                    or node_.args[1:3] == (0.0, float("inf"))  # is converted to Relu
+                )
+            )
+        )
+
+    @staticmethod
+    def is_fusable_conv_or_linear(node_: Node) -> bool:
+        """Node operator is supported fusable Linear or Conv2D on Neutron."""
+        return node_.op == "call_function" and (
+            node_.target == torch.ops.aten.conv2d.default
+            or node_.target == torch.ops.aten.addmm.default
+            or node_.target == torch.ops.aten.mm.default
+            or (
+                node_.target == torch.ops.aten.linear.default
+                and len(node_.meta["val"].shape) == 2
+            )
+        )
+
+    @staticmethod
+    def is_fusable_conv_or_linear__edge(node_: Node) -> bool:
+        """Node operator in edge dialect is supported fusable Linear or Conv2D on Neutron."""
+        return node_.op == "call_function" and (
+            node_.target == exir_ops.edge.aten.addmm.default
+            or node_.target == exir_ops.edge.aten.mm.default
+            or (
+                node_.target == exir_ops.edge.aten.convolution.default
+                and len(node_.meta["val"].shape) == 4
+            )
+        )
+
+
 class NeutronTargetSpec:
     """
     The functionality for probing the properties of Neutron Target.
@@ -39,6 +115,9 @@ def __init__(self, target: str, neutron_converter_flavor: str):
                 f"Target `{target}` contains unsupported HW version. Only N3/N3+ targets are supported at the moment."
             )
 
+        # Now only Neutron-C is supported
+        self.neutron_target_info = NeutronTargetNeutronC()
+
     # Target name.
     def get_name(self) -> str:
         return self.neutron_target.name
diff --git a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py
index d88684b86f0..f32e09e78e0 100644
--- a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py
+++ b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py
@@ -15,6 +15,11 @@
 AddMM = exir_ops.edge.aten.addmm.default
 ViewCopy = exir_ops.edge.aten.view_copy.default
 MM = exir_ops.edge.aten.mm.default
+Conv = exir_ops.edge.aten.convolution.default
+HardTanh = exir_ops.edge.aten.hardtanh.default
+Relu = exir_ops.edge.aten.relu.default
+Sigmoid = exir_ops.edge.aten.sigmoid.default
+Tanh = exir_ops.edge.aten.tanh.default
 
 
 def insert_qdq_pair_after_node(
@@ -175,9 +180,23 @@ class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass):
     main_cluster_node_to_auxiliary_nodes = {
         AddMM: [
             ViewCopy,
+            HardTanh,
+            Relu,
+            Sigmoid,
+            Tanh,
         ],
         MM: [
             ViewCopy,
+            HardTanh,
+            Relu,
+            Sigmoid,
+            Tanh,
+        ],
+        Conv: [
+            HardTanh,
+            Relu,
+            Sigmoid,
+            Tanh,
         ],
     }
 
diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py
index e7ad7ff7a0b..80237c5b37a 100644
--- a/backends/nxp/neutron_partitioner.py
+++ b/backends/nxp/neutron_partitioner.py
@@ -80,6 +80,10 @@ class QDQCluster:
         operator.getitem,
         exir_ops.edge.aten.view_copy.default,
         exir_ops.edge.aten.permute_copy.default,
+        exir_ops.edge.aten.hardtanh.default,
+        exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.sigmoid.default,
+        exir_ops.edge.aten.tanh.default,
     ]
 
     def __init__(self):
diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
index 2681e221869..6564c19d7b9 100644
--- a/backends/nxp/quantizer/neutron_quantizer.py
+++ b/backends/nxp/quantizer/neutron_quantizer.py
@@ -5,10 +5,11 @@
 # LICENSE file in the root directory of this source tree.
 
 import torch
-
 from executorch.backends.nxp.aten_passes.neutron_aten_pass_manager import (
     NeutronAtenPassManager,
 )
+
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.backends.nxp.quantizer.patterns import (
     AbsPattern,
     AdaptiveAvgPoolPattern,
@@ -181,7 +182,8 @@ def get_supported_operators(cls) -> list[OperatorConfig]:
 
 
 class NeutronQuantizer(ComposableQuantizer):
-    def __init__(self):
+    def __init__(self, neutron_target_spec: NeutronTargetSpec):
+        self.neutron_target_spec = neutron_target_spec
         static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None)
         static_fc_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_fc_qspec, None)
         super().__init__(
@@ -189,19 +191,19 @@ def __init__(self):
                 NeutronAtenQuantizer(AbsPattern(), static_qconfig),
                 NeutronAtenQuantizer(AdaptiveAvgPoolPattern(), static_qconfig),
                 NeutronAtenQuantizer(AddTensorPattern(), static_qconfig),
-                NeutronAtenQuantizer(AddmmPattern(), static_fc_qconfig),
+                NeutronAtenQuantizer(AddmmPattern(self), static_fc_qconfig),
                 NeutronAtenQuantizer(AvgPoolPattern(), static_qconfig),
                 NeutronAtenQuantizer(CatPattern(), static_qconfig),
                 NeutronAtenQuantizer(Conv1dPattern(), static_qconfig),
-                NeutronAtenQuantizer(Conv2dPattern(), static_qconfig),
+                NeutronAtenQuantizer(Conv2dPattern(self), static_qconfig),
                 NeutronAtenQuantizer(DropoutPattern(), static_qconfig),
                 NeutronAtenQuantizer(FlattenPattern(), static_qconfig),
                 NeutronAtenQuantizer(HardTanhPattern(), static_qconfig),
                 NeutronAtenQuantizer(HardTanhInPlacePattern(), static_qconfig),
-                NeutronAtenQuantizer(LinearPattern(), static_fc_qconfig),
+                NeutronAtenQuantizer(LinearPattern(self), static_fc_qconfig),
                 NeutronAtenQuantizer(MaxPoolPattern(), static_qconfig),
                 NeutronAtenQuantizer(MeanDimPattern(), static_qconfig),
-                NeutronAtenQuantizer(MmPattern(), static_qconfig),
+                NeutronAtenQuantizer(MmPattern(self), static_qconfig),
                 NeutronAtenQuantizer(PadPattern(), static_qconfig),
                 NeutronAtenQuantizer(PermutePattern(), static_qconfig),
                 NeutronAtenQuantizer(ReluPattern(), static_qconfig),
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
index 47e487494c6..f18d01f4bce 100644
--- a/backends/nxp/quantizer/patterns.py
+++ b/backends/nxp/quantizer/patterns.py
@@ -127,7 +127,7 @@ def partition_types(self) -> list[OpOverload]:
         pass
 
     def get_anchors(
-            self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors | None:
         node = fused_partition[0].nodes[-1]
 
@@ -187,6 +187,12 @@ def partition_types(self):
 
 
 class AddmmPattern(QuantizationPattern):
+    def __init__(self, neutron_quantizer):
+        self.neutron_quantizer = neutron_quantizer
+        self.neutron_target_info = (
+            self.neutron_quantizer.neutron_target_spec.neutron_target_info
+        )
+
     def partition_types(self) -> list[OpOverload]:
         return [torch.ops.aten.addmm.default]
 
@@ -208,11 +214,25 @@ def get_anchors(
             qscheme=torch.per_tensor_affine,
         )
 
+        # If the following node is a fusable activation, quantize together with activation
+        output = [(addmm_node,)]
+        if len(
+            addmm_node.users
+        ) == 1 and self.neutron_target_info.is_supported_fused_activation(
+            activation := next(iter(addmm_node.users))
+        ):
+            activation_quantizer = self.neutron_quantizer.op_to_quantizer[
+                activation.target
+            ]
+            activation_quantizer.annotate(gm)
+            output = []
+            activation.meta["quantization_annotation"].input_qspec_map = {}
+
         return PartitionAnchors(
             inputs=[(addmm_node, NodeArgsIdx(1))],
             weights=[(addmm_node, NodeArgsIdx(2))],
             biases=[(addmm_node, NodeArgsIdx(0), bias_qspec)],
-            output=[(addmm_node,)],
+            output=output,
         )
 
 
@@ -372,9 +392,69 @@ def partition_types(self) -> list[OpOverload]:
 
 
 class Conv2dPattern(ConvPattern):
+    def __init__(self, neutron_quantizer):
+        self.neutron_quantizer = neutron_quantizer
+        self.neutron_target_info = (
+            self.neutron_quantizer.neutron_target_spec.neutron_target_info
+        )
+
     def partition_types(self) -> list[OpOverload]:
         return [torch.ops.aten.conv2d.default]
 
+    def get_anchors(
+        self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
+    ) -> PartitionAnchors:
+        conv_node = fused_partition[0].nodes[-1]
+
+        bias_quantization_qspec = DerivedQuantizationSpec(
+            derived_from=[
+                (conv_node.args[0], conv_node),
+                (conv_node.args[1], conv_node),
+            ],
+            derive_qparams_fn=get_bias_qparams,
+            dtype=torch.int32,
+            quant_min=-(2**31) + 1,
+            quant_max=2**31 - 1,
+            qscheme=torch.per_channel_symmetric,
+            ch_axis=0,
+        )
+
+        weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver
+        weight_quantization_spec = QuantizationSpec(
+            dtype=torch.int8,
+            observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr,
+            quant_min=-127,
+            quant_max=127,
+            qscheme=torch.per_channel_symmetric,
+            ch_axis=0,
+        )
+
+        # Keep bias empty if not supplied
+        bias = []
+        if len(conv_node.args) > 2 and conv_node.args[2] is not None:
+            bias = [(conv_node, NodeArgsIdx(2), bias_quantization_qspec)]
+
+        # If the following node is a fusable activation, quantize together with activation
+        output = [(conv_node,)]
+        if len(
+            conv_node.users
+        ) == 1 and self.neutron_target_info.is_supported_fused_activation(
+            activation := next(iter(conv_node.users))
+        ):
+            activation_quantizer = self.neutron_quantizer.op_to_quantizer[
+                activation.target
+            ]
+            activation_quantizer.annotate(gm)
+            output = []
+            activation.meta["quantization_annotation"].input_qspec_map = {}
+
+        return PartitionAnchors(
+            inputs=[(conv_node, NodeArgsIdx(0))],
+            weights=[(conv_node, NodeArgsIdx(1), weight_quantization_spec)],
+            biases=bias,
+            output=output,
+        )
+
 
 class DropoutPattern(SharedSpecPattern):
     """
@@ -403,7 +483,6 @@ class HardTanhPattern(SingleInputBasicPattern):
     def partition_types(self):
         return [torch.ops.aten.hardtanh.default]
 
-
     def replacement_op(self):
         raise AssertionError()
 
@@ -434,6 +513,12 @@ def replacement_op(self):
 
 
 class LinearPattern(QuantizationPattern):
+    def __init__(self, neutron_quantizer):
+        self.neutron_quantizer = neutron_quantizer
+        self.neutron_target_info = (
+            self.neutron_quantizer.neutron_target_spec.neutron_target_info
+        )
+
     def partition_types(self) -> list[OpOverload]:
         return [torch.ops.aten.linear.default]
 
@@ -459,11 +544,27 @@ def get_anchors(
         if len(linear_node.args) > 2:
             bias = [(linear_node, NodeArgsIdx(2), bias_qspec)]
 
+        # If the following node is a fusable activation, quantize together with activation
+        output = [(linear_node,)]
+        if (
+            len(linear_node.users) == 1
+            and len(linear_node.meta["val"].shape) <= 2
+            and self.neutron_target_info.is_supported_fused_activation(
+                activation := next(iter(linear_node.users))
+            )
+        ):
+            activation_quantizer = self.neutron_quantizer.op_to_quantizer[
+                activation.target
+            ]
+            activation_quantizer.annotate(gm)
+            output = []
+            activation.meta["quantization_annotation"].input_qspec_map = {}
+
         return PartitionAnchors(
             inputs=[(linear_node, NodeArgsIdx(0))],
             weights=[(linear_node, NodeArgsIdx(1))],
             biases=bias,
-            output=[(linear_node,)],
+            output=output,
         )
 
 
@@ -486,6 +587,12 @@ def partition_types(self):
 
 
 class MmPattern(QuantizationPattern):
+    def __init__(self, neutron_quantizer):
+        self.neutron_quantizer = neutron_quantizer
+        self.neutron_target_info = (
+            self.neutron_quantizer.neutron_target_spec.neutron_target_info
+        )
+
     def partition_types(self) -> list[OpOverload]:
         return [torch.ops.aten.mm.default]
 
@@ -494,11 +601,25 @@ def get_anchors(
     ) -> PartitionAnchors:
         mm_node = fused_partition[0].nodes[-1]
 
+        # If the following node is a fusable activation, quantize together with activation
+        output = [(mm_node,)]
+        if len(
+            mm_node.users
+        ) == 1 and self.neutron_target_info.is_supported_fused_activation(
+            activation := next(iter(mm_node.users))
+        ):
+            activation_quantizer = self.neutron_quantizer.op_to_quantizer[
+                activation.target
+            ]
+            activation_quantizer.annotate(gm)
+            output = []
+            activation.meta["quantization_annotation"].input_qspec_map = {}
+
         return PartitionAnchors(
             inputs=[(mm_node, NodeArgsIdx(0))],
             weights=[(mm_node, NodeArgsIdx(1))],
             biases=[],
-            output=[(mm_node,)],
+            output=output,
         )
 
 
diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py
index 09bceb2b0d3..703be1669a5 100644
--- a/backends/nxp/tests/executorch_pipeline.py
+++ b/backends/nxp/tests/executorch_pipeline.py
@@ -4,6 +4,7 @@
 # LICENSE file in the root directory of this source tree.
 
 from dataclasses import dataclass
+from functools import partial
 from typing import Callable
 
 import torch
@@ -12,6 +13,7 @@
 from executorch.backends.nxp.backend.custom_delegation_options import (
     CustomDelegationOptions,
 )
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import (
     NeutronEdgePassManager,
 )
@@ -27,6 +29,12 @@
 from executorch.extension.export_util.utils import export_to_edge
 from torch import nn
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantizer import Quantizer
+
+default_neutron_converter_flavor = "SDK_25_09"
+neutron_target_spec = NeutronTargetSpec(
+    target="imxrt700", neutron_converter_flavor=default_neutron_converter_flavor
+)
 
 
 @dataclass
@@ -55,6 +63,10 @@ def get_random_calibration_inputs(
     ]
 
 
+def _get_default_quantizer(target_spec: NeutronTargetSpec) -> Quantizer:
+    return NeutronQuantizer(target_spec)
+
+
 def to_model_input_spec(
     input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]]
 ) -> tuple[ModelInputSpec, ...]:
@@ -85,13 +97,17 @@ def to_quantized_edge_program(
         [tuple[ModelInputSpec, ...]], list[tuple[torch.Tensor, ...]]
     ] = get_random_calibration_inputs,
     target="imxrt700",
-    neutron_converter_flavor="SDK_25_09",
+    neutron_converter_flavor=default_neutron_converter_flavor,
     remove_quant_io_ops=False,
     custom_delegation_options=CustomDelegationOptions(),  # noqa B008
-    get_quantizer_fn=lambda: NeutronQuantizer(),
+    get_quantizer_fn=None,
 ) -> EdgeProgramManager:
-    calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec))
+    _neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor)
+    if get_quantizer_fn is None:
+        get_quantizer_fn = partial(_get_default_quantizer, _neutron_target_spec)
+    quantizer = get_quantizer_fn()
 
+    calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec))
     example_input = calibration_inputs[0]
 
     # Make sure the model is in the evaluation mode.
@@ -101,7 +117,7 @@ def to_quantized_edge_program(
 
     exir_program_aten__module_quant = _quantize_model(
         exir_program_aten.module(),
-        get_quantizer_fn(),
+        quantizer,
         calibration_inputs,
     )
 
diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py
index f613349fed0..c4d9491d4a7 100644
--- a/backends/nxp/tests/models.py
+++ b/backends/nxp/tests/models.py
@@ -9,6 +9,8 @@
 
 import torch
 
+from torch import nn
+
 
 class Conv1dModule(torch.nn.Module):
     def __init__(
@@ -501,3 +503,71 @@ def __init__(self, dim, keepdim):
     def forward(self, x):
         x = self.conv(x)
         return torch.mean(x, dim=self.dim, keepdim=self.keepdim)
+
+
+def get_activation(activation, inplace):
+    match activation:
+        case "relu":
+            return nn.ReLU(inplace=inplace)
+        case "relu_hardtanh":
+            return nn.Hardtanh(inplace=inplace, min_val=0.0, max_val=float("inf"))
+        case "relu6":
+            return nn.ReLU6(inplace=inplace)
+        case "tanh":
+            if inplace:
+                return torch.tanh
+            else:
+                return torch.tanh_
+        case "sigmoid":
+            return nn.Sigmoid()
+        case _:
+            raise ValueError
+
+
+class LinearActivationModule(torch.nn.Module):
+    def __init__(
+        self, activation: str, inplace: bool, in_channels: int, mode: str = "linear"
+    ):
+        super().__init__()
+        self.mode = mode.lower()
+        assert self.mode in [
+            "linear",
+            "addmm",
+            "mm",
+        ], "Mode must be 'linear', 'addmm', or 'mm'"
+
+        if self.mode == "linear":
+            self.linear = torch.nn.Linear(in_channels, in_channels)
+        else:
+            # Manual weight and bias for addmm/mm
+            self.weight = torch.nn.Parameter(torch.empty(in_channels, in_channels))
+            self.bias = torch.nn.Parameter(torch.empty(in_channels))
+            torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+            fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            torch.nn.init.uniform_(self.bias, -bound, bound)
+
+        self.activation = get_activation(activation, inplace)
+        self.eval()
+
+    def forward(self, x):
+        if self.mode == "linear":
+            x = self.linear(x)
+        if self.mode == "addmm":
+            x = torch.addmm(self.bias, x, self.weight)
+        elif self.mode == "mm":
+            x = torch.mm(x, self.weight)
+        return self.activation(x)
+
+
+class ConvActivationModule(torch.nn.Module):
+    def __init__(self, activation: str, inplace: bool, in_channels: int):
+        super().__init__()
+
+        self.conv = Conv2dModule(in_channels=in_channels)
+        self.activation = get_activation(activation, inplace)
+        self.eval()
+
+    def forward(self, x):
+        x = self.conv(x)
+        return self.activation(x)
diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py
index a189299be52..ff1c215fc55 100644
--- a/backends/nxp/tests/test_edge_passes.py
+++ b/backends/nxp/tests/test_edge_passes.py
@@ -1,14 +1,37 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import kgb
 import numpy as np
+import torch
+
+from executorch.backends.nxp.backend.edge_helper import _is_dequantize, _is_quantize
+from executorch.backends.nxp.backend.edge_program_converter import (
+    EdgeProgramToIRConverter,
+)
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters import (
     ViewCopyConverter,
 )
-from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    neutron_target_spec,
+    to_quantized_edge_program,
+)
 from executorch.backends.nxp.tests.executors import (
     EdgeProgramExecutor,
     OverrideTargetSupportCheck,
 )
-from executorch.backends.nxp.tests.models import ConvFCFCSoftmaxModuleWithoutReshape
+from executorch.backends.nxp.tests.models import (
+    ConvActivationModule,
+    ConvFCFCSoftmaxModuleWithoutReshape,
+    LinearActivationModule,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
+from parameterized import parameterized
+from torch.export import ExportedProgram
 from torch.fx import Graph, Node
 
 
@@ -19,21 +42,6 @@ def _is_view_copy(node_: Node) -> bool:
     )
 
 
-def _is_dequantize(node_: Node) -> bool:
-    return (
-        node_.op == "call_function"
-        and node_.target.__name__
-        == "quantized_decomposed.dequantize_per_tensor.default"
-    )
-
-
-def _is_quantize(node_: Node) -> bool:
-    return (
-        node_.op == "call_function"
-        and node_.target.__name__ == "quantized_decomposed.quantize_per_tensor.default"
-    )
-
-
 def _find_view_copy_node_indices(graph_nodes: list[Node]) -> list[int]:
     view_copy_nodes_indices = []
 
@@ -57,32 +65,211 @@ def _assert_nodes_form_a_view_copy_qdq_cluster(graph: Graph, node_indices: list[
     assert quantize.args[0] == view_copy
 
 
-def test_moving_view_copy_into_separate_qdq_clusters():
-    model = ConvFCFCSoftmaxModuleWithoutReshape()
-    input_shape = (1, 4, 3, 33)
+class TestEdgePasses(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        torch.manual_seed(23)
+        np.random.seed(42)
+
+    def test_moving_view_copy_into_separate_qdq_clusters(self):
+        model = ConvFCFCSoftmaxModuleWithoutReshape()
+        input_shape = (1, 4, 3, 33)
+
+        # Prohibit `view_copy` conversion for the testing purposes.
+        def unsupported_target(*_):
+            return False
+
+        with OverrideTargetSupportCheck(
+            ViewCopyConverter, new_target_support_check=unsupported_target
+        ):
+            epm = to_quantized_edge_program(model, input_shape, target="imxrt700")
+            exported_program = epm.exported_program()
+
+            nodes = list(exported_program.graph_module.graph.nodes)
+            assert len(nodes) == 28
 
-    # Prohibit `view_copy` conversion for the testing purposes.
-    def unsupported_target(*_):
-        return False
+            view_copy_indices = _find_view_copy_node_indices(nodes)
 
-    with OverrideTargetSupportCheck(
-        ViewCopyConverter, new_target_support_check=unsupported_target
+            assert len(view_copy_indices) == 4
+            for idx in view_copy_indices:
+                _assert_nodes_form_a_view_copy_qdq_cluster(
+                    exported_program.graph, node_indices=[idx - 1, idx, idx + 1]
+                )
+
+            # Make sure the program is runnable.
+            input_data = np.random.random(input_shape).astype("float32")
+            program_executor = EdgeProgramExecutor(exported_program)
+            program_executor.inference(input_data)
+
+    @parameterized.expand(
+        [
+            ["relu"],
+            ["relu6"],
+            ["tanh"],
+            ["sigmoid"],
+        ]
+    )
+    def test_moving_fusable_activations_into_separate_qdq_clusters__addmm(
+        self, activation
     ):
-        epm = to_quantized_edge_program(model, input_shape, target="imxrt700")
-        exported_program = epm.exported_program()
+        with kgb.spy_on(
+            EdgeProgramToIRConverter.convert_program,
+            call_original=True,
+            owner=EdgeProgramToIRConverter,
+        ) as converter_spy:
 
-        nodes = list(exported_program.graph_module.graph.nodes)
-        assert len(nodes) == 28
+            input_shape = (1, 4)
+            model = LinearActivationModule(
+                activation=activation,
+                inplace=True,
+                in_channels=input_shape[1],
+                mode="addmm",
+            )
 
-        view_copy_indices = _find_view_copy_node_indices(nodes)
+            _ = to_quantized_edge_program(model, input_shape)
+            exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
 
-        assert len(view_copy_indices) == 4
-        for idx in view_copy_indices:
-            _assert_nodes_form_a_view_copy_qdq_cluster(
-                exported_program.graph, node_indices=[idx - 1, idx, idx + 1]
+            # Check linear and activation are in separate QDQ clusters
+            nodes = list(exported_program.graph.nodes)
+            assert len(nodes) == 12
+            assert _is_dequantize(nodes[5])
+            assert (
+                neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge(
+                    nodes[6]
+                )
+            )
+            assert _is_quantize(nodes[7])
+            assert _is_dequantize(nodes[8])
+            assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge(
+                nodes[9]
             )
+            assert _is_quantize(nodes[10])
 
-        # Make sure the program is runnable.
-        input_data = np.random.random(input_shape).astype("float32")
-        program_executor = EdgeProgramExecutor(exported_program)
-        program_executor.inference(input_data)
+    @parameterized.expand(
+        [
+            ["relu"],
+            ["relu6"],
+            ["tanh"],
+            ["sigmoid"],
+        ]
+    )
+    def test_moving_fusable_activations_into_separate_qdq_clusters__mm(
+        self, activation
+    ):
+        with kgb.spy_on(
+            EdgeProgramToIRConverter.convert_program,
+            call_original=True,
+            owner=EdgeProgramToIRConverter,
+        ) as converter_spy:
+
+            input_shape = (1, 4)
+            model = LinearActivationModule(
+                activation=activation,
+                inplace=True,
+                in_channels=input_shape[1],
+                mode="mm",
+            )
+
+            _ = to_quantized_edge_program(model, input_shape)
+            exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
+
+            # Check linear and activation are in separate QDQ clusters
+            nodes = list(exported_program.graph.nodes)
+            assert len(nodes) == 10
+            assert _is_dequantize(nodes[3])
+            assert (
+                neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge(
+                    nodes[4]
+                )
+            )
+            assert _is_quantize(nodes[5])
+            assert _is_dequantize(nodes[6])
+            assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge(
+                nodes[7]
+            )
+            assert _is_quantize(nodes[8])
+
+    @parameterized.expand(
+        [
+            ["relu"],
+            ["relu6"],
+            ["tanh"],
+            ["sigmoid"],
+        ]
+    )
+    def test_moving_fusable_activations_into_separate_qdq_clusters__linear(
+        self, activation
+    ):
+        with kgb.spy_on(
+            EdgeProgramToIRConverter.convert_program,
+            call_original=True,
+            owner=EdgeProgramToIRConverter,
+        ) as converter_spy:
+
+            input_shape = (1, 4)
+            model = LinearActivationModule(
+                activation=activation,
+                inplace=True,
+                in_channels=input_shape[1],
+                mode="linear",
+            )
+
+            _ = to_quantized_edge_program(model, input_shape)
+            exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
+
+            # Check linear and activation are in separate QDQ clusters
+            nodes = list(exported_program.graph.nodes)
+            assert len(nodes) == 13
+            assert _is_dequantize(nodes[5])
+            assert (
+                neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge(
+                    nodes[7]
+                )
+            )
+            assert _is_quantize(nodes[8])
+            assert _is_dequantize(nodes[9])
+            assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge(
+                nodes[10]
+            )
+            assert _is_quantize(nodes[11])
+
+    @parameterized.expand(
+        [
+            ["relu"],
+            ["relu6"],
+            ["tanh"],
+            ["sigmoid"],
+        ]
+    )
+    def test_moving_fusable_activations_into_separate_qdq_clusters__conv(
+        self, activation
+    ):
+        with kgb.spy_on(
+            EdgeProgramToIRConverter.convert_program,
+            call_original=True,
+            owner=EdgeProgramToIRConverter,
+        ) as converter_spy:
+
+            input_shape = (1, 4, 8, 8)
+            model = ConvActivationModule(
+                activation=activation, inplace=True, in_channels=input_shape[1]
+            )
+
+            _ = to_quantized_edge_program(model, input_shape)
+            exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
+
+            # Check linear and activation are in separate QDQ clusters
+            nodes = list(exported_program.graph.nodes)
+            assert len(nodes) == 16
+            assert _is_dequantize(nodes[9])
+            assert (
+                neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge(
+                    nodes[10]
+                )
+            )
+            assert _is_quantize(nodes[11])
+            assert _is_dequantize(nodes[12])
+            assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge(
+                nodes[13]
+            )
+            assert _is_quantize(nodes[14])
diff --git a/backends/nxp/tests/test_per_channel_conversion.py b/backends/nxp/tests/test_per_channel_conversion.py
index 043ba8fc001..7dcabf46be0 100644
--- a/backends/nxp/tests/test_per_channel_conversion.py
+++ b/backends/nxp/tests/test_per_channel_conversion.py
@@ -30,7 +30,7 @@
     ToChannelLastPreprocess,
 )
 from executorch.backends.nxp.tests.models import Conv2dModule
-from executorch.backends.nxp.tests.test_quantizer import _get_target_name
+from executorch.exir.dialects._ops import ops as exir_ops
 
 from torch import fx
 from torch._ops import OpOverload
@@ -144,10 +144,12 @@ def test_per_channel_convolution(self):
 
             nodes = list(exported_program.graph.nodes)
 
-            assert _get_target_name(nodes[8]).endswith(
-                "quantized_decomposed.dequantize_per_channel.default"
+            assert (
+                nodes[8].target
+                == exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
             )
-            assert _get_target_name(nodes[9]).endswith(
-                "quantized_decomposed.dequantize_per_channel.default"
+            assert (
+                nodes[9].target
+                == exir_ops.edge.quantized_decomposed.dequantize_per_channel.default
             )
-            assert nodes[10].name == "aten_convolution_default"
+            assert nodes[10].target == exir_ops.edge.aten.convolution.default
diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
index 624e350ed21..0cc6fbfbc2f 100644
--- a/backends/nxp/tests/test_quantizer.py
+++ b/backends/nxp/tests/test_quantizer.py
@@ -7,14 +7,41 @@
 
 from copy import deepcopy
 
+import executorch.backends.nxp.tests.executorch_pipeline as executorch_pipeline
 import executorch.backends.nxp.tests.models as models
+import numpy as np
+import pytest
 import torch
+
+from executorch.backends.nxp.backend.edge_program_converter import (
+    EdgeProgramToIRConverter,
+)
+
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    neutron_target_spec,
+    to_quantized_edge_program,
+)
+from executorch.backends.nxp.tests.executors import (
+    convert_run_compare,
+    graph_contains_any_of_ops,
+    ToChannelFirstPreprocess,
+    ToChannelLastPreprocess,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch.export import ExportedProgram
+from torch.fx import GraphModule
 from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
 
-
-def _get_target_name(node):
-    return node._pretty_print_target(node.target)
+fuse_activation_ops = [
+    exir_ops.edge.aten.addmm.default,
+    exir_ops.edge.aten.mm.default,
+    exir_ops.edge.aten.convolution.default,
+    exir_ops.edge.aten.hardtanh.default,
+    exir_ops.edge.aten.relu.default,
+    exir_ops.edge.aten.sigmoid.default,
+    exir_ops.edge.aten.tanh.default,
+]
 
 
 def test_quantizer_conv2d():
@@ -22,7 +49,7 @@ def test_quantizer_conv2d():
     model.eval()
 
     example_input = (torch.ones(1, 4, 32, 32),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -38,22 +65,21 @@ def test_quantizer_conv2d():
     assert nodes[11].name == "conv2d"
     # [0]: Input, [1] : weights, [2]: bias
     assert (
-        _get_target_name(nodes[11].args[0])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
+        nodes[11].args[0].target
+        == torch.ops.quantized_decomposed.dequantize_per_tensor.default
     )
     assert (
-        _get_target_name(nodes[11].args[1])
-        == "torch.ops.quantized_decomposed.dequantize_per_channel.default"
+        nodes[11].args[1].target
+        == torch.ops.quantized_decomposed.dequantize_per_channel.default
     )
     assert (
-        _get_target_name(nodes[11].args[2])
-        == "torch.ops.quantized_decomposed.dequantize_per_channel.default"
+        nodes[11].args[2].target
+        == torch.ops.quantized_decomposed.dequantize_per_channel.default
     )
     assert (
-        _get_target_name(nodes[12])
-        == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
+        nodes[12].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
     )
-    assert nodes[12].args[0].name == "conv2d"
+    assert nodes[12].args[0].target == torch.ops.aten.conv2d.default
 
 
 def test_quantizer_linear():
@@ -61,7 +87,7 @@ def test_quantizer_linear():
     model.eval()
 
     example_input = (torch.ones(10, 32),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -77,22 +103,19 @@ def test_quantizer_linear():
     assert nodes[7].name == "linear"
     # [0]: Input, [1] : weights, [2]: bias
     assert (
-        _get_target_name(nodes[7].args[0])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
-    )
-    assert (
-        _get_target_name(nodes[7].args[1])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
+        nodes[7].args[0].target
+        == torch.ops.quantized_decomposed.dequantize_per_tensor.default
     )
     assert (
-        _get_target_name(nodes[7].args[2])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
+        nodes[7].args[1].target
+        == torch.ops.quantized_decomposed.dequantize_per_tensor.default
     )
     assert (
-        _get_target_name(nodes[8])
-        == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
+        nodes[7].args[2].target
+        == torch.ops.quantized_decomposed.dequantize_per_tensor.default
     )
-    assert nodes[8].args[0].name == "linear"
+    assert nodes[8].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    assert nodes[8].args[0].target == torch.ops.aten.linear.default
 
 
 def test_quantizer_maxpool2d():
@@ -100,7 +123,7 @@ def test_quantizer_maxpool2d():
     model.eval()
 
     example_input = (torch.ones(1, 8, 32, 32),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -114,16 +137,15 @@ def test_quantizer_maxpool2d():
     nodes = list(m.graph.nodes)
     assert len(nodes) == 18
     # Check if QDQ pattern:
-    assert nodes[14].name == "max_pool2d"
+    assert nodes[14].target == torch.ops.aten.max_pool2d.default
     assert (
-        _get_target_name(nodes[14].args[0])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
+        nodes[14].args[0].target
+        == torch.ops.quantized_decomposed.dequantize_per_tensor.default
     )
     assert (
-        _get_target_name(nodes[15])
-        == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
+        nodes[15].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
     )
-    assert nodes[15].args[0].name == "max_pool2d"
+    assert nodes[15].args[0].target == torch.ops.aten.max_pool2d.default
 
     # Check if input and output quantization is same
     input_quant = nodes[14].args[0].args[1:]
@@ -136,7 +158,7 @@ def test_quantizer_softmax():
     model.eval()
 
     example_input = (torch.ones(1, 10),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -150,16 +172,13 @@ def test_quantizer_softmax():
     nodes = list(m.graph.nodes)
     assert len(nodes) == 7
     # Check if QDQ pattern:
-    assert nodes[3].name == "softmax"
-    assert (
-        _get_target_name(nodes[3].args[0])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
-    )
+    assert nodes[3].target == torch.ops.aten.softmax.int
     assert (
-        _get_target_name(nodes[4])
-        == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
+        nodes[3].args[0].target
+        == torch.ops.quantized_decomposed.dequantize_per_tensor.default
     )
-    assert nodes[4].args[0].name == "softmax"
+    assert nodes[4].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    assert nodes[4].args[0].target == torch.ops.aten.softmax.int
 
     # Check output quantization
     scale, zp, _, _, dtype = nodes[4].args[1:]
@@ -173,7 +192,7 @@ def test_quantizer_single_maxpool2d():
     model.eval()
 
     example_input = (torch.ones(1, 4, 32, 32),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -186,7 +205,7 @@ def test_quantizer_single_maxpool2d():
 
     nodes = list(m.graph.nodes)
     assert len(nodes) == 7
-    assert nodes[3].name == "max_pool2d"
+    assert nodes[3].target == torch.ops.aten.max_pool2d.default
     assert "quantization_annotation" not in nodes[1].meta
 
 
@@ -195,7 +214,7 @@ def test_quantizer_conv2d_relu():
     model.eval()
 
     example_input = (torch.ones(1, 4, 32, 32),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -207,10 +226,14 @@ def test_quantizer_conv2d_relu():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 14
-    assert nodes[9].name == "dequantize_per_tensor_default_1"
-    assert nodes[10].name == "relu"
-    assert nodes[11].name == "quantize_per_tensor_default_2"
+
+    assert len(nodes) == 12
+    assert (
+        nodes[6].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default
+    )
+    assert nodes[7].target == torch.ops.aten.conv2d.default
+    assert nodes[8].target == torch.ops.aten.relu.default
+    assert nodes[9].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
 
 
 def test_quantizer_conv2d_avg_pool2d():
@@ -218,7 +241,7 @@ def test_quantizer_conv2d_avg_pool2d():
     model.eval()
 
     example_input = (torch.ones(1, 4, 16, 16),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -230,10 +253,15 @@ def test_quantizer_conv2d_avg_pool2d():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
+
     assert len(nodes) == 18
-    assert nodes[13].name == "dequantize_per_tensor_default_1"
-    assert nodes[14].name == "avg_pool2d"
-    assert nodes[15].name == "quantize_per_tensor_default_2"
+    assert (
+        nodes[13].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default
+    )
+    assert nodes[14].target == torch.ops.aten.avg_pool2d.default
+    assert (
+        nodes[15].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    )
 
 
 def test_quantizer_conv2d_permute():
@@ -241,7 +269,7 @@ def test_quantizer_conv2d_permute():
     model.eval()
 
     example_input = (torch.ones(1, 4, 16, 16),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -255,9 +283,13 @@ def test_quantizer_conv2d_permute():
     nodes = list(m.graph.nodes)
 
     assert len(nodes) == 14
-    assert nodes[9].name == "dequantize_per_tensor_default_1"
-    assert nodes[10].name == "permute"
-    assert nodes[11].name == "quantize_per_tensor_default_2"
+    assert (
+        nodes[9].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default
+    )
+    assert nodes[10].target == torch.ops.aten.permute.default
+    assert (
+        nodes[11].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    )
 
 
 def test_multiple_shared_spec_ops_in_row():
@@ -269,7 +301,7 @@ def test_multiple_shared_spec_ops_in_row():
     model.eval()
 
     example_input = (torch.ones(1, 3, 64, 64),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
     # noinspection PyTypeChecker
@@ -282,10 +314,14 @@ def test_multiple_shared_spec_ops_in_row():
 
     nodes = list(m.graph.nodes)
 
-    assert len(nodes) == 17
-    assert nodes[-5].name.startswith("dequantize_per_tensor_default")
-    assert nodes[-4].name == "max_pool2d"
-    assert nodes[-3].name.startswith("quantize_per_tensor_default")
+    assert len(nodes) == 15
+    assert (
+        nodes[-5].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default
+    )
+    assert nodes[-4].target == torch.ops.aten.max_pool2d.default
+    assert (
+        nodes[-3].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    )
 
     # Assert that post-ReLU quantize and pre-MaxPool dequantize has same specs
     assert nodes[-6].args[1:] == nodes[-5].args[1:]
@@ -302,7 +338,7 @@ def test_quantizers_order_invariance():
     model.eval()
 
     example_input = (torch.ones(1, 4, 64, 64),)
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
 
     graph_module = torch.export.export(model, example_input, strict=True).module()
 
@@ -324,3 +360,214 @@ def test_quantizers_order_invariance():
 
     assert len(nodes) == len(nodes_reversed)
     assert all(n == n_reversed for n, n_reversed in zip(nodes, nodes_reversed))
+
+
+@pytest.mark.parametrize(
+    "activation, inplace",
+    [
+        ("relu", True),
+        ("relu", False),
+        ("relu6", True),
+        ("relu6", False),
+        ("tanh", True),
+        ("tanh", False),
+        ("sigmoid", False),
+    ],
+)
+def test_quantizer__linear_w_activation(mocker, activation, inplace):
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+    quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model")
+
+    input_shape = (1, 4)
+    model = models.LinearActivationModule(
+        activation=activation,
+        inplace=inplace,
+        in_channels=input_shape[1],
+        mode="linear",
+    )
+
+    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+
+    # Make sure that all nodes were delegated.
+    assert not graph_contains_any_of_ops(
+        graph=edge_program.graph,
+        ops=fuse_activation_ops,
+    )
+    assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
+
+    # Check linear and activation are in the same QDQ cluster
+    nodes = list(exir_program_aten_quant.graph.nodes)
+    assert len(nodes) == 12
+    assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[7])
+    assert neutron_target_spec.neutron_target_info.is_supported_fused_activation(
+        nodes[8]
+    )
+    assert nodes[9].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
+    convert_run_compare(
+        exported_program,
+        input_data,
+        tfl_model=tflite_flatbuffers_model,
+        atol=1.0,
+    )
+
+
+@pytest.mark.parametrize(
+    "activation, inplace",
+    [
+        ("relu", True),
+        ("relu", False),
+        ("relu6", True),
+        ("relu6", False),
+        ("tanh", True),
+        ("tanh", False),
+        ("sigmoid", False),
+    ],
+)
+def test_quantizer__addmm_w_activation(mocker, activation, inplace):
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+    quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model")
+
+    input_shape = (1, 4)
+    model = models.LinearActivationModule(
+        activation=activation, inplace=inplace, in_channels=input_shape[1], mode="addmm"
+    )
+
+    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+
+    # Make sure that all nodes were delegated.
+    assert not graph_contains_any_of_ops(
+        graph=edge_program.graph,
+        ops=fuse_activation_ops,
+    )
+    assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
+
+    # Check linear and activation are in the same QDQ cluster
+    nodes = list(exir_program_aten_quant.graph.nodes)
+    assert len(nodes) == 12
+    assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[7])
+    assert neutron_target_spec.neutron_target_info.is_supported_fused_activation(
+        nodes[8]
+    )
+    assert nodes[9].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
+    convert_run_compare(
+        exported_program,
+        input_data,
+        tfl_model=tflite_flatbuffers_model,
+        atol=1.0,
+    )
+
+
+@pytest.mark.parametrize(
+    "activation, inplace",
+    [
+        ("relu", True),
+        ("relu", False),
+        ("relu6", True),
+        ("relu6", False),
+        ("tanh", True),
+        ("tanh", False),
+        ("sigmoid", False),
+    ],
+)
+def test_quantizer__mm_w_activation(mocker, activation, inplace):
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+    quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model")
+
+    input_shape = (1, 4)
+    model = models.LinearActivationModule(
+        activation=activation, inplace=inplace, in_channels=input_shape[1], mode="mm"
+    )
+
+    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+
+    # Make sure that all nodes were delegated.
+    assert not graph_contains_any_of_ops(
+        graph=edge_program.graph,
+        ops=fuse_activation_ops,
+    )
+    assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
+
+    # Check linear and activation are in the same QDQ cluster
+    nodes = list(exir_program_aten_quant.graph.nodes)
+    assert len(nodes) == 10
+    assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[5])
+    assert neutron_target_spec.neutron_target_info.is_supported_fused_activation(
+        nodes[6]
+    )
+    assert nodes[7].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
+    convert_run_compare(
+        exported_program,
+        input_data,
+        tfl_model=tflite_flatbuffers_model,
+        atol=1.0,
+    )
+
+
+@pytest.mark.parametrize(
+    "activation, inplace",
+    [
+        ("relu", True),
+        ("relu", False),
+        ("relu6", True),
+        ("relu6", False),
+        ("tanh", True),
+        ("tanh", False),
+        ("sigmoid", False),
+    ],
+)
+def test_quantizer__conv_w_activation(mocker, activation, inplace):
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+    quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model")
+
+    input_shape = (1, 4, 8, 8)
+    model = models.ConvActivationModule(
+        activation=activation, inplace=inplace, in_channels=input_shape[1]
+    )
+
+    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+
+    # Make sure that all nodes were delegated.
+    assert not graph_contains_any_of_ops(
+        graph=edge_program.graph,
+        ops=fuse_activation_ops,
+    )
+    assert any("lowered_module" in node.name for node in edge_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    exir_program_aten_quant: GraphModule = quantizer_spy.spy_return
+
+    # Check linear and activation are in the same QDQ cluster
+    nodes = list(exir_program_aten_quant.graph.nodes)
+    assert len(nodes) == 16
+    assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[11])
+    assert neutron_target_spec.neutron_target_info.is_supported_fused_activation(
+        nodes[12]
+    )
+    assert (
+        nodes[13].target == torch.ops.quantized_decomposed.quantize_per_tensor.default
+    )
+    input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
+    convert_run_compare(
+        exported_program,
+        input_data,
+        tfl_model=tflite_flatbuffers_model,
+        tflite_input_preprocess=ToChannelLastPreprocess(),
+        tflite_output_preprocess=ToChannelFirstPreprocess(),
+        atol=1.0,
+    )
diff --git a/backends/nxp/tests/test_removing_dead_code.py b/backends/nxp/tests/test_removing_dead_code.py
index cc51746c81c..00cb6775b3c 100644
--- a/backends/nxp/tests/test_removing_dead_code.py
+++ b/backends/nxp/tests/test_removing_dead_code.py
@@ -10,7 +10,10 @@
 import torch
 
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.tests.executorch_pipeline import _quantize_model
+from executorch.backends.nxp.tests.executorch_pipeline import (
+    _quantize_model,
+    neutron_target_spec,
+)
 from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops
 
 
@@ -51,7 +54,7 @@ def test_removing_dead_code(self):
         )
 
         # The `NeutronQuantizer` should remove the dead code in the `transform_for_annotation()` method.
-        quantizer = NeutronQuantizer()
+        quantizer = NeutronQuantizer(neutron_target_spec)
         exir_program_aten_quant = _quantize_model(
             exir_program_aten.module(), quantizer, [example_inputs]
         )
diff --git a/backends/nxp/tests/test_split_group_convolution.py b/backends/nxp/tests/test_split_group_convolution.py
index 4c9f277e34d..52133b6c7e2 100644
--- a/backends/nxp/tests/test_split_group_convolution.py
+++ b/backends/nxp/tests/test_split_group_convolution.py
@@ -21,6 +21,7 @@
 from executorch.backends.nxp.tests.executorch_pipeline import (
     _quantize_model,
     get_random_calibration_inputs,
+    neutron_target_spec,
     to_model_input_spec,
 )
 from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops
@@ -40,7 +41,7 @@ def _quantize_and_lower_module(
     module: GraphModule, input_shape: tuple[int, ...], target="imxrt700"
 ) -> EdgeProgramManager:
     calibration_inputs = get_random_calibration_inputs(to_model_input_spec(input_shape))
-    quantizer = NeutronQuantizer()
+    quantizer = NeutronQuantizer(neutron_target_spec)
 
     exir_program_aten__module_quant = _quantize_model(
         module, quantizer, calibration_inputs
diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py
index cb23f99a54d..4c90b3aefad 100644
--- a/examples/nxp/aot_neutron_compile.py
+++ b/examples/nxp/aot_neutron_compile.py
@@ -15,6 +15,7 @@
 import executorch.kernels.quantized  # noqa F401
 
 import torch
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import (
     NeutronEdgePassManager,
 )
@@ -114,7 +115,10 @@ def post_training_quantize(
     # Based on executorch.examples.arm.aot_amr_compiler.quantize
     logging.info("Quantizing model")
     logging.debug(f"---> Original model: {model}")
-    quantizer = NeutronQuantizer()
+    neutron_target_spec = NeutronTargetSpec(
+        target="imxrt700", neutron_converter_flavor="wrapper"
+    )
+    quantizer = NeutronQuantizer(neutron_target_spec)
 
     m = prepare_pt2e(model, quantizer)
     # Calibration: