diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/constant_pad_nd_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/constant_pad_nd_converter.py index 761840c379f..7b749818f5e 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/constant_pad_nd_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/constant_pad_nd_converter.py @@ -9,7 +9,6 @@ import numpy as np from executorch.backends.nxp.backend.edge_helper import input_rank -from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList from executorch.backends.nxp.backend.ir.converter.conversion.translator import ( apply_permutation_to, create_channels_first_to_channels_last_permutation, @@ -24,6 +23,7 @@ ) from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( + pad_options, pad_v2_options, ) from torch.fx import Node @@ -50,6 +50,10 @@ def _is_supported_in_IR( if not NodeConverter._has_shared_q_params_if_quantized(node): return False + if len(paddings) > 4 and paddings[4:6] != [0, 0]: + # Attempt to Pad channels dimension -> currently not supported + return False + return True # noinspection PyMethodMayBeStatic @@ -101,6 +105,15 @@ def convert(self, node: Node): np.asarray(paddings, "int32"), "paddings" ) + if constant == 0.0: + # We're padding with zeros, we can use traditional Pad op + t_op.tmp_inputs = [x, paddings_tensor] + t_op.tmp_outputs = [y] + t_op.builtin_options = pad_options.Pad() + + self.builder.append_operators([t_op]) + return + if x.quantization is None: constant_tensor = self.builder.create_tensor_for_data( np.array([constant], tf_lite_type_to_numpy(x.type)), "constant" @@ -124,6 +137,4 @@ def convert(self, node: Node): t_op.tmp_outputs = [y] t_op.builtin_options = pad_v2_options.PadV2() - ops_to_add = OpsList(middle_op=t_op) - - self.builder.append_operators(ops_to_add.flatten()) + self.builder.append_operators([t_op]) diff --git a/backends/nxp/backend/ir/edge_passes/__init__.py b/backends/nxp/backend/ir/edge_passes/__init__.py new file mode 100755 index 00000000000..e69de29bb2d diff --git a/backends/nxp/backend/ir/edge_passes/remove_io_quant_ops_pass.py b/backends/nxp/backend/ir/edge_passes/remove_io_quant_ops_pass.py new file mode 100644 index 00000000000..d49b646d489 --- /dev/null +++ b/backends/nxp/backend/ir/edge_passes/remove_io_quant_ops_pass.py @@ -0,0 +1,79 @@ +# Copyright 2025 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch + +from executorch.exir import EdgeProgramManager +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import ExportPass +from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs +from torch.fx.passes.infra.pass_base import PassResult + + +class RemoveIOQuantOpsPass(ExportPass): + + def __init__(self, edge_program_manager: EdgeProgramManager): + super().__init__() + self._edge_program_manager = edge_program_manager + + def _get_quantizable_input_indices(self): + exported_program = self._edge_program_manager.exported_program() + + graph = exported_program.graph_module.graph + user_inputs = exported_program.graph_signature.user_inputs + + inputs_to_quantization = [] + + for input_index, user_input in enumerate(user_inputs): + placeholders = [ + n for n in graph.nodes if n.op == "placeholder" and n.name == user_input + ] + assert placeholders + target_placeholder = placeholders[0] + + if len(target_placeholder.users) != 1: + raise ValueError(f"Input {input_index} has more than one users") + + quantize = next(iter(target_placeholder.users)) + if ( + quantize.target + != exir_ops.edge.quantized_decomposed.quantize_per_tensor.default + ): + continue + + inputs_to_quantization.append(input_index) + + return inputs_to_quantization + + def _get_quantizable_output_indices(self): + exported_program = self._edge_program_manager.exported_program() + + graph = exported_program.graph_module.graph + outputs = [n for n in graph.nodes if n.op == "output"] + if len(outputs) != 1: + raise NotImplementedError("Only 1 output node is supported.") + + outputs_to_quantization = [] + + user_outputs = list(outputs[0].args[0]) + for output_index, user_output in enumerate(user_outputs): + if ( + user_output.target + != exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default + ): + continue + + outputs_to_quantization.append(output_index) + + return outputs_to_quantization + + def call(self, graph_module: torch.fx.GraphModule): + input_indices = self._get_quantizable_input_indices() + output_indices = self._get_quantizable_output_indices() + + QuantizeInputs(self._edge_program_manager, input_indices).call(graph_module) + QuantizeOutputs(self._edge_program_manager, output_indices).call(graph_module) + + return PassResult(graph_module, True) diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index b2fe2c9bbac..5f0857dc75e 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -41,6 +41,7 @@ no_outside_users, ) from torch import fx +from torch.ao.quantization.quantizer.utils import _annotate_output_qspec from torchao.quantization.pt2e import HistogramObserver, MinMaxObserver from torchao.quantization.pt2e.quantizer import ( ComposableQuantizer, @@ -237,6 +238,8 @@ def transform_for_annotation( return pass_runner(model).graph_module def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: + self._annotate_inputs(model) + nodes = list(model.graph.nodes) for node in nodes: if ( @@ -252,5 +255,25 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule: return model + def _is_input_annotated(self, node: fx.Node) -> bool: + return ( + "quantization_annotation" in node.meta + and node.meta["quantization_annotation"]._annotated + ) + + def _mark_input_node_as_annotated(self, node: fx.Node) -> None: + if "quantization_annotation" not in node.meta: + node.meta["quantization_annotation"] = QuantizationAnnotation() + node.meta["quantization_annotation"]._annotated = True + + def _annotate_inputs(self, model: fx.GraphModule): + for node in model.graph.nodes: + if self._is_input_annotated(node): + continue + + if node.op == "placeholder" and len(node.users) > 0: + _annotate_output_qspec(node, act_qspec) + self._mark_input_node_as_annotated(node) + def validate(self, model: torch.fx.GraphModule) -> None: return super().validate(model) diff --git a/backends/nxp/run_unittests.sh b/backends/nxp/run_unittests.sh index dde10065743..f0a91e2a65d 100755 --- a/backends/nxp/run_unittests.sh +++ b/backends/nxp/run_unittests.sh @@ -11,4 +11,4 @@ EXECUTORCH_DIR=$(dirname $(dirname $SCRIPT_DIR)) cd $EXECUTORCH_DIR # '-c /dev/null' is used to ignore root level pytest.ini. -PYTHONPATH=`cd ..; pwd` pytest -c /dev/null backends/nxp/tests/ +pytest -c /dev/null backends/nxp/tests/ diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index f942e60e08a..36ef76f8a2c 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -6,6 +6,9 @@ import torch from executorch import exir +from executorch.backends.nxp.backend.ir.edge_passes.remove_io_quant_ops_pass import ( + RemoveIOQuantOpsPass, +) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer @@ -37,6 +40,7 @@ def to_quantized_edge_program( operators_not_to_delegate: list[str] = None, target="imxrt700", neutron_converter_flavor="SDK_25_03", + remove_quant_io_ops=False, ) -> EdgeProgramManager: if isinstance(input_shapes, list): assert all(isinstance(input_shape, tuple) for input_shape in input_shapes), ( @@ -77,6 +81,11 @@ def to_quantized_edge_program( compile_config=EdgeCompileConfig(_check_ir_validity=False), ) + if remove_quant_io_ops: + edge_program_manager = edge_program_manager.transform( + [RemoveIOQuantOpsPass(edge_program_manager=edge_program_manager)] + ) + return edge_program_manager diff --git a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py index c4097c3023c..47cd54c4efb 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py @@ -63,16 +63,10 @@ def test_constant_pad_nd_conversion__default_constant(): pytest.param((2, 4), tuple(range(4)), id="2D, padding N, H"), pytest.param((2, 4, 6), tuple(range(2)), id="3D, padding H"), pytest.param((2, 4, 6), tuple(range(4)), id="3D, padding C, H"), - pytest.param((2, 4, 6), list(range(6)), id="3D, padding N, C, H"), pytest.param((2, 4, 6, 8), tuple(range(2)), id="4D, padding W"), pytest.param((2, 4, 6, 8), tuple(range(4)), id="4D, padding H, W"), - pytest.param((2, 4, 6, 8), list(range(6)), id="4D, padding C, H, W"), - pytest.param((2, 4, 6, 8), list(range(8)), id="4D, padding N, C, H, W"), - pytest.param((1, 2, 3, 4, 5), list(range(2)), id="5D, padding D"), + pytest.param((1, 2, 3, 4, 5), tuple(range(2)), id="5D, padding D"), pytest.param((1, 2, 3, 4, 5), tuple(range(4)), id="5D, padding W, D"), - pytest.param((1, 2, 3, 4, 5), list(range(6)), id="5D, padding H, W, D"), - pytest.param((1, 2, 3, 4, 5), tuple(range(8)), id="5D, padding C, H, W, D"), - pytest.param((1, 2, 3, 4, 5), list(range(10)), id="5D, padding N, C, H, W, D"), ], ) def test_constant_pad_nd_conversion__format_less(input_shape, paddings): @@ -93,8 +87,9 @@ def test_constant_pad_nd_conversion__format_less(input_shape, paddings): ], ) def test_constant_pad_nd_conversion__channels_first(input_shape, paddings): + model = ConstantPadNDConvModule(paddings) edge_program = to_edge_program( - ConstantPadNDConvModule(paddings), input_shape + model, input_shape ).exported_program() # Extra `Conv` after the padding. input_data = np.random.random(input_shape).astype(np.float32) diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py new file mode 100644 index 00000000000..d7920aa55d8 --- /dev/null +++ b/backends/nxp/tests/ir/edge_passes/test_remove_io_quant_ops_pass.py @@ -0,0 +1,122 @@ +# Copyright 2025 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +import itertools + +import executorch.kernels.quantized # noqa F401 +import torch +from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.models import Conv2dReLUModule +from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNet +from executorch.exir import ExecutorchBackendConfig +from executorch.exir.passes.quantize_io_pass import get_config_method_name + + +def test_remove_io_quant_ops_pass__conv_relu(): + model = Conv2dReLUModule() + model.eval() + + input_shape = (1, 4, 32, 32) + edge_program_manager = to_quantized_edge_program( + model, input_shape, remove_quant_io_ops=True + ) + + exec_prog = edge_program_manager.to_executorch( + config=ExecutorchBackendConfig(extract_delegate_segments=False) + ) + + nodes = list(exec_prog.exported_program().graph.nodes) + assert ( + nodes[0].meta["val"].dtype == torch.int8 + ), "Input tensor doesn't have type INT8." + assert nodes[2].name == "executorch_call_delegate" + assert ( + nodes[4].meta["val"][0].dtype == torch.int8 + ), "Output tensor doesn't have type INT8." + + assert ( + get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods + ) + assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods + assert ( + get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods + ) + assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods + + +def test_remove_io_quant_ops_pass__cifarnet(): + model = CifarNet().get_eager_model() + input_shape = (1, 3, 32, 32) + edge_program_manager = to_quantized_edge_program( + model, input_shape, remove_quant_io_ops=True + ) + + exec_prog = edge_program_manager.to_executorch( + config=ExecutorchBackendConfig(extract_delegate_segments=False) + ) + + nodes = list(exec_prog.exported_program().graph.nodes) + assert len(nodes) == 17 + assert ( + nodes[0].meta["val"].dtype == torch.int8 + ), "Input tensor doesn't have type INT8." + assert ( + nodes[16].meta["val"][0].dtype == torch.int8 + ), "Output tensor doesn't have type INT8." + + assert ( + get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods + ) + assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods + assert ( + get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods + ) + assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods + + +class MultiInputOutputModule(torch.nn.Module): + def __init__(self): + super().__init__() + + self.conv = torch.nn.Conv2d(4, 64, 2, bias=False) + self.relu = torch.nn.ReLU() + + def forward(self, x, y): + z = self.relu(x) + x = self.conv(z) + return x + y, z + + +def test_multiple_inputs__multiple_outputs(): + model = MultiInputOutputModule() + model.eval() + + input_shape = [(1, 4, 32, 32), (1, 1, 1, 31)] + edge_program_manager = to_quantized_edge_program( + model, input_shape, remove_quant_io_ops=True + ) + + exec_prog = edge_program_manager.to_executorch( + config=ExecutorchBackendConfig(extract_delegate_segments=False) + ) + + nodes = list(exec_prog.exported_program().graph.nodes) + print(nodes) + assert ( + nodes[0].meta["val"].dtype == torch.int8 + ), "Input tensor doesn't have type INT8." + assert nodes[3].name == "executorch_call_delegate" + assert ( + nodes[-1].meta["val"][0].dtype == torch.int8 + ), "Output tensor doesn't have type INT8." + + quant_method_variants = itertools.product( + ["input", "output"], [0, 1], ["scale", "zp"] + ) + + expected_methods = [ + get_config_method_name(None, arg_type, index, key) + for arg_type, index, key in quant_method_variants + ] + assert all(method in exec_prog._config_methods for method in expected_methods) diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py new file mode 100644 index 00000000000..6c143df79b3 --- /dev/null +++ b/backends/nxp/tests/test_integration.py @@ -0,0 +1,50 @@ +# Copyright 2024 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import executorch.extension.pybindings.portable_lib +import executorch.kernels.quantized # noqa F401 + +from executorch.backends.nxp.tests.executorch_pipeline import ( + to_quantized_executorch_program, +) +from executorch.backends.nxp.tests.models import ConvFCSoftmaxModule +from executorch.devtools.backend_debug import get_delegation_info +from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNet + + +def test_conv_fc_softmax__to_executorch_program(): + model = ConvFCSoftmaxModule() + input_shape = (1, 4, 5, 5) + + exec_prog = to_quantized_executorch_program(model, input_shape) + + program = exec_prog.exported_program() + assert ( + program.graph_module.lowered_module_0 + ), "There is no lowered module with Neutron microcode." + + delegation_info = get_delegation_info(program.graph_module) + assert delegation_info.num_delegated_subgraphs == 1 + assert delegation_info.num_non_delegated_nodes == 11 + assert delegation_info.num_delegated_nodes == 13 + + for node in program.graph.nodes: + # Make sure Convolution and AddMM are delegated + assert "convolution" not in node.name + assert "addmm" not in node.name + + +def test_cifarnet(): + model = CifarNet().get_eager_model().eval() + input_shape = (1, 3, 32, 32) + exec_prog = to_quantized_executorch_program(model, input_shape) + + delegation_info = get_delegation_info(exec_prog.exported_program().graph_module) + assert delegation_info.num_delegated_subgraphs == 1 + assert delegation_info.num_non_delegated_nodes == 17 + assert delegation_info.num_delegated_nodes == 42 + + nodes = list(exec_prog.exported_program().graph.nodes) + assert nodes[2].name == "quantized_decomposed_quantize_per_tensor_default" diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py index 37156ca5d51..e97889e09a2 100644 --- a/backends/nxp/tests/test_quantizer.py +++ b/backends/nxp/tests/test_quantizer.py @@ -195,8 +195,8 @@ def test_quantizer_single_maxpool2d(): m(*example_input) nodes = list(m.graph.nodes) - assert len(nodes) == 3 - assert nodes[1].name == "max_pool2d" + assert len(nodes) == 7 + assert nodes[3].name == "max_pool2d" assert "quantization_annotation" not in nodes[1].meta diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index d8e4d324de2..5c0634697d0 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -16,6 +16,9 @@ import torch +from executorch.backends.nxp.backend.ir.edge_passes.remove_io_quant_ops_pass import ( + RemoveIOQuantOpsPass, +) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer @@ -191,6 +194,15 @@ def _get_batch_size(data): default=False, help="Test the selected model and print the accuracy between 0 and 1.", ) + parser.add_argument( + "-r", + "--remove-quant-io-ops", + action="store_true", + required=False, + default=False, + help="Remove I/O De/Quantize nodes. Model will start to accept quantized " + "inputs and produce quantized outputs.", + ) parser.add_argument( "--operators_not_to_delegate", required=False, @@ -266,6 +278,14 @@ def _get_batch_size(data): ) logging.debug(f"Exported graph:\n{edge_program.exported_program().graph}") + if args.remove_quant_io_ops: + edge_program = edge_program.transform( + [RemoveIOQuantOpsPass(edge_program_manager=edge_program)] + ) + logging.debug( + f"Exported graph (RemoveIOQuantOpsPass):\n{edge_program.exported_program().graph}" + ) + # 6. Export to ExecuTorch program try: exec_prog = edge_program.to_executorch( diff --git a/src/executorch/examples/nxp/experimental b/src/executorch/examples/nxp/experimental new file mode 120000 index 00000000000..e8cb6c8aedb --- /dev/null +++ b/src/executorch/examples/nxp/experimental @@ -0,0 +1 @@ +../../../../examples/nxp/experimental/ \ No newline at end of file