NXP Backend: Add pass to remove IO de/quantize nodes

skywall · robert-kalmar · commit dc8e7ea70411 · 2025-08-04T14:08:46.000+02:00
diff --git a/backends/nxp/backend/ir/edge_passes/__init__.py b/backends/nxp/backend/ir/edge_passes/__init__.py
diff --git a/backends/nxp/backend/ir/edge_passes/remove_io_quant_ops_pass.py b/backends/nxp/backend/ir/edge_passes/remove_io_quant_ops_pass.py
@@ -0,0 +1,79 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+
+from executorch.exir import EdgeProgramManager
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass
+from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs
+from torch.fx.passes.infra.pass_base import PassResult
+
+
+class RemoveIOQuantOpsPass(ExportPass):
+
+    def __init__(self, edge_program_manager: EdgeProgramManager):
+        super().__init__()
+        self._edge_program_manager = edge_program_manager
+
+    def _get_quantizable_input_indices(self):
+        exported_program = self._edge_program_manager.exported_program()
+
+        graph = exported_program.graph_module.graph
+        user_inputs = exported_program.graph_signature.user_inputs
+
+        inputs_to_quantization = []
+
+        for input_index, user_input in enumerate(user_inputs):
+            placeholders = [
+                n for n in graph.nodes if n.op == "placeholder" and n.name == user_input
+            ]
+            assert placeholders
+            target_placeholder = placeholders[0]
+
+            if len(target_placeholder.users) != 1:
+                raise ValueError(f"Input {input_index} has more than one users")
+
+            quantize = next(iter(target_placeholder.users))
+            if (
+                quantize.target
+                != exir_ops.edge.quantized_decomposed.quantize_per_tensor.default
+            ):
+                continue
+
+            inputs_to_quantization.append(input_index)
+
+        return inputs_to_quantization
+
+    def _get_quantizable_output_indices(self):
+        exported_program = self._edge_program_manager.exported_program()
+
+        graph = exported_program.graph_module.graph
+        outputs = [n for n in graph.nodes if n.op == "output"]
+        if len(outputs) != 1:
+            raise NotImplementedError("Only 1 output node is supported.")
+
+        outputs_to_quantization = []
+
+        user_outputs = list(outputs[0].args[0])
+        for output_index, user_output in enumerate(user_outputs):
+            if (
+                user_output.target
+                != exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default
+            ):
+                continue
+
+            outputs_to_quantization.append(output_index)
+
+        return outputs_to_quantization
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        input_indices = self._get_quantizable_input_indices()
+        output_indices = self._get_quantizable_output_indices()
+
+        QuantizeInputs(self._edge_program_manager, input_indices).call(graph_module)
+        QuantizeOutputs(self._edge_program_manager, output_indices).call(graph_module)
+
+        return PassResult(graph_module, True)
diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py
@@ -6,6 +6,9 @@
 import torch
 
 from executorch import exir
+from executorch.backends.nxp.backend.ir.edge_passes.remove_io_quant_ops_pass import (
+    RemoveIOQuantOpsPass,
+)
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
@@ -37,6 +40,7 @@ def to_quantized_edge_program(
     operators_not_to_delegate: list[str] = None,
     target="imxrt700",
     neutron_converter_flavor="SDK_25_03",
+    remove_quant_io_ops=False,
 ) -> EdgeProgramManager:
     if isinstance(input_shapes, list):
         assert all(isinstance(input_shape, tuple) for input_shape in input_shapes), (
@@ -77,6 +81,11 @@ def to_quantized_edge_program(
         compile_config=EdgeCompileConfig(_check_ir_validity=False),
     )
 
+    if remove_quant_io_ops:
+        edge_program_manager = edge_program_manager.transform(
+            [RemoveIOQuantOpsPass(edge_program_manager=edge_program_manager)]
+        )
+
     return edge_program_manager
 
 
diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py
@@ -0,0 +1,122 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+import itertools
+
+import executorch.kernels.quantized  # noqa F401
+import torch
+from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
+from executorch.backends.nxp.tests.models import Conv2dReLUModule
+from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNet
+from executorch.exir import ExecutorchBackendConfig
+from executorch.exir.passes.quantize_io_pass import get_config_method_name
+
+
+def test_remove_io_quant_ops_pass__conv_relu():
+    model = Conv2dReLUModule()
+    model.eval()
+
+    input_shape = (1, 4, 32, 32)
+    edge_program_manager = to_quantized_edge_program(
+        model, input_shape, remove_quant_io_ops=True
+    )
+
+    exec_prog = edge_program_manager.to_executorch(
+        config=ExecutorchBackendConfig(extract_delegate_segments=False)
+    )
+
+    nodes = list(exec_prog.exported_program().graph.nodes)
+    assert (
+        nodes[0].meta["val"].dtype == torch.int8
+    ), "Input tensor doesn't have type INT8."
+    assert nodes[2].name == "executorch_call_delegate"
+    assert (
+        nodes[4].meta["val"][0].dtype == torch.int8
+    ), "Output tensor doesn't have type INT8."
+
+    assert (
+        get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods
+    )
+    assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods
+    assert (
+        get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods
+    )
+    assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods
+
+
+def test_remove_io_quant_ops_pass__cifarnet():
+    model = CifarNet().get_eager_model()
+    input_shape = (1, 3, 32, 32)
+    edge_program_manager = to_quantized_edge_program(
+        model, input_shape, remove_quant_io_ops=True
+    )
+
+    exec_prog = edge_program_manager.to_executorch(
+        config=ExecutorchBackendConfig(extract_delegate_segments=False)
+    )
+
+    nodes = list(exec_prog.exported_program().graph.nodes)
+    assert len(nodes) == 17
+    assert (
+        nodes[0].meta["val"].dtype == torch.int8
+    ), "Input tensor doesn't have type INT8."
+    assert (
+        nodes[16].meta["val"][0].dtype == torch.int8
+    ), "Output tensor doesn't have type INT8."
+
+    assert (
+        get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods
+    )
+    assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods
+    assert (
+        get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods
+    )
+    assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods
+
+
+class MultiInputOutputModule(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        self.conv = torch.nn.Conv2d(4, 64, 2, bias=False)
+        self.relu = torch.nn.ReLU()
+
+    def forward(self, x, y):
+        z = self.relu(x)
+        x = self.conv(z)
+        return x + y, z
+
+
+def test_multiple_inputs__multiple_outputs():
+    model = MultiInputOutputModule()
+    model.eval()
+
+    input_shape = [(1, 4, 32, 32), (1, 1, 1, 31)]
+    edge_program_manager = to_quantized_edge_program(
+        model, input_shape, remove_quant_io_ops=True
+    )
+
+    exec_prog = edge_program_manager.to_executorch(
+        config=ExecutorchBackendConfig(extract_delegate_segments=False)
+    )
+
+    nodes = list(exec_prog.exported_program().graph.nodes)
+    print(nodes)
+    assert (
+        nodes[0].meta["val"].dtype == torch.int8
+    ), "Input tensor doesn't have type INT8."
+    assert nodes[3].name == "executorch_call_delegate"
+    assert (
+        nodes[-1].meta["val"][0].dtype == torch.int8
+    ), "Output tensor doesn't have type INT8."
+
+    quant_method_variants = itertools.product(
+        ["input", "output"], [0, 1], ["scale", "zp"]
+    )
+
+    expected_methods = [
+        get_config_method_name(None, arg_type, index, key)
+        for arg_type, index, key in quant_method_variants
+    ]
+    assert all(method in exec_prog._config_methods for method in expected_methods)
diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py
@@ -16,6 +16,9 @@
 
 import torch
 
+from executorch.backends.nxp.backend.ir.edge_passes.remove_io_quant_ops_pass import (
+    RemoveIOQuantOpsPass,
+)
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
@@ -191,6 +194,15 @@ def _get_batch_size(data):
         default=False,
         help="Test the selected model and print the accuracy between 0 and 1.",
     )
+    parser.add_argument(
+        "-r",
+        "--remove-quant-io-ops",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Remove I/O De/Quantize nodes. Model will start to accept quantized "
+        "inputs and produce quantized outputs.",
+    )
     parser.add_argument(
         "--operators_not_to_delegate",
         required=False,
@@ -266,6 +278,14 @@ def _get_batch_size(data):
     )
     logging.debug(f"Exported graph:\n{edge_program.exported_program().graph}")
 
+    if args.remove_quant_io_ops:
+        edge_program = edge_program.transform(
+            [RemoveIOQuantOpsPass(edge_program_manager=edge_program)]
+        )
+        logging.debug(
+            f"Exported graph (RemoveIOQuantOpsPass):\n{edge_program.exported_program().graph}"
+        )
+
     # 6. Export to ExecuTorch program
     try:
         exec_prog = edge_program.to_executorch(