-
Notifications
You must be signed in to change notification settings - Fork 746
NXP backend: Add model input and output quantization #12586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
320d280
6d369cc
5499f4e
dc8e7ea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| # Copyright 2025 NXP | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
|
|
||
| import torch | ||
|
|
||
| from executorch.exir import EdgeProgramManager | ||
| from executorch.exir.dialects._ops import ops as exir_ops | ||
| from executorch.exir.pass_base import ExportPass | ||
| from executorch.exir.passes.quantize_io_pass import QuantizeInputs, QuantizeOutputs | ||
| from torch.fx.passes.infra.pass_base import PassResult | ||
|
|
||
|
|
||
| class RemoveIOQuantOpsPass(ExportPass): | ||
|
|
||
| def __init__(self, edge_program_manager: EdgeProgramManager): | ||
| super().__init__() | ||
| self._edge_program_manager = edge_program_manager | ||
|
|
||
| def _get_quantizable_input_indices(self): | ||
| exported_program = self._edge_program_manager.exported_program() | ||
|
|
||
| graph = exported_program.graph_module.graph | ||
| user_inputs = exported_program.graph_signature.user_inputs | ||
|
|
||
| inputs_to_quantization = [] | ||
|
|
||
| for input_index, user_input in enumerate(user_inputs): | ||
| placeholders = [ | ||
| n for n in graph.nodes if n.op == "placeholder" and n.name == user_input | ||
| ] | ||
| assert placeholders | ||
| target_placeholder = placeholders[0] | ||
|
|
||
| if len(target_placeholder.users) != 1: | ||
| raise ValueError(f"Input {input_index} has more than one users") | ||
|
|
||
| quantize = next(iter(target_placeholder.users)) | ||
| if ( | ||
| quantize.target | ||
| != exir_ops.edge.quantized_decomposed.quantize_per_tensor.default | ||
| ): | ||
| continue | ||
|
|
||
| inputs_to_quantization.append(input_index) | ||
|
|
||
| return inputs_to_quantization | ||
|
|
||
| def _get_quantizable_output_indices(self): | ||
| exported_program = self._edge_program_manager.exported_program() | ||
|
|
||
| graph = exported_program.graph_module.graph | ||
| outputs = [n for n in graph.nodes if n.op == "output"] | ||
| if len(outputs) != 1: | ||
| raise NotImplementedError("Only 1 output node is supported.") | ||
|
|
||
| outputs_to_quantization = [] | ||
|
|
||
| user_outputs = list(outputs[0].args[0]) | ||
| for output_index, user_output in enumerate(user_outputs): | ||
| if ( | ||
| user_output.target | ||
| != exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default | ||
| ): | ||
| continue | ||
|
|
||
| outputs_to_quantization.append(output_index) | ||
|
|
||
| return outputs_to_quantization | ||
|
|
||
| def call(self, graph_module: torch.fx.GraphModule): | ||
| input_indices = self._get_quantizable_input_indices() | ||
| output_indices = self._get_quantizable_output_indices() | ||
|
|
||
| QuantizeInputs(self._edge_program_manager, input_indices).call(graph_module) | ||
| QuantizeOutputs(self._edge_program_manager, output_indices).call(graph_module) | ||
|
|
||
| return PassResult(graph_module, True) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,16 +63,10 @@ def test_constant_pad_nd_conversion__default_constant(): | |
| pytest.param((2, 4), tuple(range(4)), id="2D, padding N, H"), | ||
| pytest.param((2, 4, 6), tuple(range(2)), id="3D, padding H"), | ||
| pytest.param((2, 4, 6), tuple(range(4)), id="3D, padding C, H"), | ||
| pytest.param((2, 4, 6), list(range(6)), id="3D, padding N, C, H"), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Curious why remove these tests?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These tests are no longer relevant, because ConstantPad nodes with following params will not be delegated. It is related to this restricstion: https://github.com/pytorch/executorch/pull/12586/files#diff-e01d426046aa644b4e18ffa510b42e50e1b18b8f5407bcfb0d210f701d95b16aR53 We are still able to convert them into intermediate model representation, but Neutron conversion will fail. |
||
| pytest.param((2, 4, 6, 8), tuple(range(2)), id="4D, padding W"), | ||
| pytest.param((2, 4, 6, 8), tuple(range(4)), id="4D, padding H, W"), | ||
| pytest.param((2, 4, 6, 8), list(range(6)), id="4D, padding C, H, W"), | ||
| pytest.param((2, 4, 6, 8), list(range(8)), id="4D, padding N, C, H, W"), | ||
| pytest.param((1, 2, 3, 4, 5), list(range(2)), id="5D, padding D"), | ||
| pytest.param((1, 2, 3, 4, 5), tuple(range(2)), id="5D, padding D"), | ||
| pytest.param((1, 2, 3, 4, 5), tuple(range(4)), id="5D, padding W, D"), | ||
| pytest.param((1, 2, 3, 4, 5), list(range(6)), id="5D, padding H, W, D"), | ||
| pytest.param((1, 2, 3, 4, 5), tuple(range(8)), id="5D, padding C, H, W, D"), | ||
| pytest.param((1, 2, 3, 4, 5), list(range(10)), id="5D, padding N, C, H, W, D"), | ||
| ], | ||
| ) | ||
| def test_constant_pad_nd_conversion__format_less(input_shape, paddings): | ||
|
|
@@ -93,8 +87,9 @@ def test_constant_pad_nd_conversion__format_less(input_shape, paddings): | |
| ], | ||
| ) | ||
| def test_constant_pad_nd_conversion__channels_first(input_shape, paddings): | ||
| model = ConstantPadNDConvModule(paddings) | ||
| edge_program = to_edge_program( | ||
| ConstantPadNDConvModule(paddings), input_shape | ||
| model, input_shape | ||
| ).exported_program() # Extra `Conv` after the padding. | ||
|
|
||
| input_data = np.random.random(input_shape).astype(np.float32) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,122 @@ | ||
| # Copyright 2025 NXP | ||
| # | ||
| # This source code is licensed under the BSD-style license found in the | ||
| # LICENSE file in the root directory of this source tree. | ||
| import itertools | ||
|
|
||
| import executorch.kernels.quantized # noqa F401 | ||
| import torch | ||
| from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program | ||
| from executorch.backends.nxp.tests.models import Conv2dReLUModule | ||
| from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNet | ||
| from executorch.exir import ExecutorchBackendConfig | ||
| from executorch.exir.passes.quantize_io_pass import get_config_method_name | ||
|
|
||
|
|
||
| def test_remove_io_quant_ops_pass__conv_relu(): | ||
| model = Conv2dReLUModule() | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you are calculating indices do you want to test a model which has >1 inputs and outputs?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Test for multi input/output model added. |
||
| model.eval() | ||
|
|
||
| input_shape = (1, 4, 32, 32) | ||
| edge_program_manager = to_quantized_edge_program( | ||
| model, input_shape, remove_quant_io_ops=True | ||
| ) | ||
|
|
||
| exec_prog = edge_program_manager.to_executorch( | ||
| config=ExecutorchBackendConfig(extract_delegate_segments=False) | ||
| ) | ||
|
|
||
| nodes = list(exec_prog.exported_program().graph.nodes) | ||
| assert ( | ||
| nodes[0].meta["val"].dtype == torch.int8 | ||
| ), "Input tensor doesn't have type INT8." | ||
| assert nodes[2].name == "executorch_call_delegate" | ||
| assert ( | ||
| nodes[4].meta["val"][0].dtype == torch.int8 | ||
| ), "Output tensor doesn't have type INT8." | ||
|
|
||
| assert ( | ||
| get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods | ||
| ) | ||
| assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods | ||
| assert ( | ||
| get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods | ||
| ) | ||
| assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods | ||
|
|
||
|
|
||
| def test_remove_io_quant_ops_pass__cifarnet(): | ||
| model = CifarNet().get_eager_model() | ||
| input_shape = (1, 3, 32, 32) | ||
| edge_program_manager = to_quantized_edge_program( | ||
| model, input_shape, remove_quant_io_ops=True | ||
| ) | ||
|
|
||
| exec_prog = edge_program_manager.to_executorch( | ||
| config=ExecutorchBackendConfig(extract_delegate_segments=False) | ||
| ) | ||
|
|
||
| nodes = list(exec_prog.exported_program().graph.nodes) | ||
| assert len(nodes) == 17 | ||
| assert ( | ||
| nodes[0].meta["val"].dtype == torch.int8 | ||
| ), "Input tensor doesn't have type INT8." | ||
| assert ( | ||
| nodes[16].meta["val"][0].dtype == torch.int8 | ||
| ), "Output tensor doesn't have type INT8." | ||
|
|
||
| assert ( | ||
| get_config_method_name(None, "input", 0, "scale") in exec_prog._config_methods | ||
| ) | ||
| assert get_config_method_name(None, "input", 0, "zp") in exec_prog._config_methods | ||
| assert ( | ||
| get_config_method_name(None, "output", 0, "scale") in exec_prog._config_methods | ||
| ) | ||
| assert get_config_method_name(None, "output", 0, "zp") in exec_prog._config_methods | ||
|
|
||
|
|
||
| class MultiInputOutputModule(torch.nn.Module): | ||
| def __init__(self): | ||
| super().__init__() | ||
|
|
||
| self.conv = torch.nn.Conv2d(4, 64, 2, bias=False) | ||
| self.relu = torch.nn.ReLU() | ||
|
|
||
| def forward(self, x, y): | ||
| z = self.relu(x) | ||
| x = self.conv(z) | ||
| return x + y, z | ||
|
|
||
|
|
||
| def test_multiple_inputs__multiple_outputs(): | ||
| model = MultiInputOutputModule() | ||
| model.eval() | ||
|
|
||
| input_shape = [(1, 4, 32, 32), (1, 1, 1, 31)] | ||
| edge_program_manager = to_quantized_edge_program( | ||
| model, input_shape, remove_quant_io_ops=True | ||
| ) | ||
|
|
||
| exec_prog = edge_program_manager.to_executorch( | ||
| config=ExecutorchBackendConfig(extract_delegate_segments=False) | ||
| ) | ||
|
|
||
| nodes = list(exec_prog.exported_program().graph.nodes) | ||
| print(nodes) | ||
| assert ( | ||
| nodes[0].meta["val"].dtype == torch.int8 | ||
| ), "Input tensor doesn't have type INT8." | ||
| assert nodes[3].name == "executorch_call_delegate" | ||
| assert ( | ||
| nodes[-1].meta["val"][0].dtype == torch.int8 | ||
| ), "Output tensor doesn't have type INT8." | ||
|
|
||
| quant_method_variants = itertools.product( | ||
| ["input", "output"], [0, 1], ["scale", "zp"] | ||
| ) | ||
|
|
||
| expected_methods = [ | ||
| get_config_method_name(None, arg_type, index, key) | ||
| for arg_type, index, key in quant_method_variants | ||
| ] | ||
| assert all(method in exec_prog._config_methods for method in expected_methods) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Feel free to improve quantize_io_pass and move this utils there if you think they can be useful elsewhere.