diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizations/replace_average_pool_before_fully_connected_with_sum.py b/backends/nxp/backend/ir/tflite_optimizer/optimizations/replace_average_pool_before_fully_connected_with_sum.py deleted file mode 100755 index 0b3926dd8a5..00000000000 --- a/backends/nxp/backend/ir/tflite_optimizer/optimizations/replace_average_pool_before_fully_connected_with_sum.py +++ /dev/null @@ -1,164 +0,0 @@ -# Copyright 2024 NXP -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import numpy as np -from executorch.backends.nxp.backend.ir import logger - -from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( - BuiltinOperator, -) -from executorch.backends.nxp.backend.ir.lib.tflite.TensorType import TensorType -from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model -from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options.sum_options import ( - Sum, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.base_optimization import ( - BaseOptimization, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.pattern_matcher import ( - Op, - PatternMatcher, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.tensor_rules import ( - RuleOr, - TensorDimensionsMatch, - TensorHasData, - TensorHasRank, - TensorIsChannelsLast, - TensorIsFormatless, - TensorsAreQuantized, - TensorsHaveOneConsumer, - TensorsHaveType, -) - - -class ReplaceAveragePoolBeforeFullyConnectedWithSum(BaseOptimization): - """Replace `AveragePool2D` and `Reshape` with `Sum` in the following pattern. - │ - ┌────────▼────────┐ - │ AveragePool2D │ (global kernel) │ - └────────┬────────┘ ┌───▼───┐ - │ (4D, channels last) │ Sum │ - ┌─────▼─────┐ └───┬───┘ - │ Reshape │ ─────► │ - └─────┬─────┘ ┌────────▼─────────┐ - │ (2D, formatless) │ FullyConnected ◄───── Scaled weights - ┌────────▼───────┐ └────────┬─────────┘ - │ FullyConnected ◄───── Weights (static) - └────────┬───────┘ - │ - - This is possible if the `AveragePool2D` is pooling across the entire input (i.e. global AveragePool). In this - case, it is possible to use a `Sum` operator instead, and then statically divide the `weights` of the - `FullyConnected`. This will effectively compute the average across the input at runtime. - This replacement becomes useful when there is a `Reshape` between, which flattens the tensor to 2D. This - flattening can be done by the `Sum` operator as well (parameter `keep_dims=False`). - As a result, the `Reshape` must simply remove the `1`s in the spatial dimensions, and keep the `batch size` and - `channels` unchanged. - """ - - def __call__(self) -> bool: - matcher = PatternMatcher( - self._builder, - [ - Op(["AveragePool2D"], ["x"], ["ap_out"]), - Op(["Reshape"], ["ap_out", ...], ["resh_out"]), - Op(["FullyConnected"], ["resh_out", "w", ...], ["y"]), - ], - [ - # Require either float32, or quantized tensors. - RuleOr( - TensorsHaveType(["w", "resh_out"], TensorType.FLOAT32), - TensorsAreQuantized(["w", "resh_out"]), - ), - TensorsHaveOneConsumer(["x", "ap_out", "resh_out"]), - TensorIsChannelsLast("ap_out"), - TensorHasRank("resh_out", 2), - TensorIsFormatless("resh_out"), - TensorHasRank("w", 2), - TensorHasData("w"), - TensorDimensionsMatch( - "ap_out", 0, "resh_out", 0 - ), # Batch size unchanged. - TensorDimensionsMatch( - "ap_out", -1, "resh_out", -1 - ), # Channels unchanged. - ], - ) - - # The mapped operator (value) will later be added into the TFLite model, in front of the `key` operator. - to_add: dict[tflite_model.Operator, tflite_model.Operator] = {} - to_remove = [] - for [ap, reshape, fc], tensor_map, _, _ in matcher.match_patterns(): - x, resh_out, w = tensor_map["x"], tensor_map["resh_out"], tensor_map["w"] - - kernel_shape = [ap.builtin_options.filter_h, ap.builtin_options.filter_w] - if kernel_shape != x.shape[1:3]: - continue # Not a global average pool. - - # Divide the static FullyConnected weights by the number of kernel elements. This will transform the `sums` - # to `averages` at runtime. - num_kernel_elements = np.prod(kernel_shape).astype("float32") - new_w = self._builder.duplicate_tensor(w) - if w.type == TensorType.FLOAT32: - # Just divide the weights. - new_w.tmp_buffer.data = np.array( - new_w.tmp_buffer.data / num_kernel_elements - ).astype("float32") - - elif w.quantization is not None: - # Divide the `scale` quantization parameter instead of the data. Since the `weights` are static, - # changing the `scale` will change the actual values represented by the quantized data. This is because - # the scale changes, while the raw data remains exactly the same. - new_w.quantization.scale.vector = [ - s / num_kernel_elements for s in new_w.quantization.scale.vector - ] - - # Since the output of the `Sum` will now contain the `sums` of its input and not the `averages`, its - # `scale` quantization parameter is not ideal. Multiply the `scale` by the number of elements of the - # kernel to maintain the same accuracy. - resh_out.quantization.scale.vector = [ - s * num_kernel_elements for s in resh_out.quantization.scale.vector - ] - - else: - # Should never happen. Raise an exception to notify us just in case. - logger.e( - logger.Code.INTERNAL_ERROR, - "ReplaceAveragePoolBeforeFullyConnectedWithSum: Unexpected type.", - ) - - fc.tmp_inputs[1] = ( - new_w # Replace the scaled `weights` of the `FullyConnected`. - ) - - # Reduce over the spatial dimensions. - axes = self._builder.create_tensor_for_data( - np.array([1, 2], "int32"), "axes" - ) - - sum_op = tflite_model.Operator( - builtin_options=Sum(keep_dims=False), - opcode_index=self._builder.op_code_index_for_op_type( - BuiltinOperator.SUM - ), - ) - sum_op.tmp_inputs = [x, axes] - sum_op.tmp_outputs = [resh_out] - - to_add[fc] = sum_op - to_remove.extend([ap, reshape]) - - # Add the new `Sum` operators into the model. - ops = self._builder.get_operators() - for k, sum_op in to_add.items(): - idx = ops.index(k) - ops.insert(idx, sum_op) - - # Remove the `AveragePool` and `Reshape` operators from the model. - for op in to_remove: - ops.remove(op) - - return len(to_remove) != 0 diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py index fc94656ac74..0d075c2cdaa 100755 --- a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py +++ b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py @@ -54,9 +54,6 @@ from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.remove_unused_tensors_and_buffers import ( RemoveUnusedTensorsAndBuffers, ) -from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.replace_average_pool_before_fully_connected_with_sum import ( - ReplaceAveragePoolBeforeFullyConnectedWithSum, -) class Optimization(Enum): @@ -83,7 +80,6 @@ class Optimization(Enum): MOVE_ACTIVATION_BEFORE_CONCAT = 15 COMBINE_HARD_SIGMOID_AND_MUL_INTO_HARD_SWISH = 16 - REPLACE_AVERAGE_POOL_BEFORE_FULLY_CONNECTED_WITH_SUM = 17 class Optimizer: @@ -164,9 +160,6 @@ def __init__( Optimization.COMBINE_HARD_SIGMOID_AND_MUL_INTO_HARD_SWISH: CombineHardSigmoidAndMulIntoHardSwish( builder, conversion_config ), - Optimization.REPLACE_AVERAGE_POOL_BEFORE_FULLY_CONNECTED_WITH_SUM: ReplaceAveragePoolBeforeFullyConnectedWithSum( - builder, conversion_config - ), } def optimize( diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 05867a0e13c..f467b66c9d5 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -25,6 +25,7 @@ ReluPattern, ReshapePattern, SoftMaxPattern, + ViewPattern, ) from executorch.backends.nxp.quantizer.utils import ( find_sequential_partitions_aten, @@ -200,6 +201,7 @@ def __init__(self): NeutronAtenQuantizer(ReluPattern(), static_qconfig), NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig), NeutronAtenQuantizer(AvgPoolPattern(), static_qconfig), + NeutronAtenQuantizer(ViewPattern(), static_qconfig), ] ) diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index b71f0621002..89252f0e75d 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -307,6 +307,15 @@ def partition_types(self): return [torch.ops.aten.reshape.default] +class ViewPattern(SharedSpecPattern): + """ + Quantizer for View operator. + """ + + def partition_types(self): + return [torch.ops.aten.view.default] + + class SoftMaxPattern(QuantizationPattern): """ Quantizer for Softmax operator. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py index d6030ebae7f..758fe8bc001 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py @@ -7,13 +7,7 @@ import pytest import torch -from executorch.backends.nxp.backend.edge_program_converter import ( - EdgeProgramToIRConverter, -) -from executorch.backends.nxp.tests.executorch_pipeline import ( - to_edge_program, - to_quantized_edge_program, -) +from executorch.backends.nxp.tests.executorch_pipeline import to_edge_program from executorch.backends.nxp.tests.executors import ( convert_run_compare, ToNCHWPreprocess, @@ -22,9 +16,7 @@ from executorch.backends.nxp.tests.models import ( ConstantPadNDConvModule, ConstantPadNDModule, - Conv2dConstantPadNDModule, ) -from torch.export import ExportedProgram @pytest.fixture(autouse=True) @@ -47,37 +39,6 @@ def test_constant_pad_nd_conversion__specific_constant(constant): convert_run_compare(edge_program, input_data) -@pytest.mark.parametrize("constant", [0.0, 67.28, 42.0, -13.37]) -@pytest.mark.skip(reason="Neutron Converter does not fully convert for NPU") -def test_constant_pad_nd_quant_conversion__specific_constant(mocker, constant): - input_shape = (2, 4, 12, 12) - paddings = (2, 2, 2, 2) - - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - - # Run conversion - _ = to_quantized_edge_program( - Conv2dConstantPadNDModule(paddings, constant), input_shape - ) - - # Capture generated model - tflite_flatbuffers_model, io_formats = converter_spy.spy_return - - # Capture converted program - edge_program: ExportedProgram = converter_spy.call_args.args[1] - - input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) - - convert_run_compare( - edge_program, - input_data, - tfl_model=tflite_flatbuffers_model, - atol=1.0, - tflite_input_preprocess=ToNHWCPreprocess(), - tflite_output_preprocess=ToNCHWPreprocess(), - ) - - def test_constant_pad_nd_conversion__default_constant(): input_shape = [2, 4, 6, 8] paddings = [1, 2, 3, 4] diff --git a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py index 9863c8acc41..7140d1f398b 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py @@ -89,6 +89,24 @@ def forward(self, x): return x +class ConvLinearViewModule(torch.nn.Module): + def __init__(self, channels: int, channels_view_out: int): + super().__init__() + self.conv = nn.Conv2d(channels, channels, 3, 2) + self.linear = nn.Linear(channels_view_out, 32, bias=True) + self.channels_view_out = channels_view_out + self.avg_pool = nn.AvgPool2d(1) + self.relu = nn.ReLU() + + def forward(self, x): + x = self.conv(x) + x = self.relu(x) + x = self.avg_pool(x) + x = x.view(-1, self.channels_view_out) + x = self.linear(x) + return x + + def test__channels_first_to_2d(mocker): input_shape = [2, 4, 7, 9] new_shape = [12, 32] # Mix up the dimensions for a thorough test. @@ -205,19 +223,20 @@ def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape): @pytest.mark.parametrize( - "input_shape, new_shape", + "input_shape, channels_view_out", [ - pytest.param((1, 4, 16, 16), (50, 18), id="4D, batch_size=1"), - pytest.param((10, 4, 16, 16), (500, 18), id="4D, , batch_size=10"), + pytest.param((1, 4, 16, 16), 196, id="4D"), ], ) -@pytest.mark.skip(reason="Neutron Converter does not fully convert for NPU") -def test_view_copy_w_conv_quant_conversion(mocker, input_shape, new_shape): +def test_view_w_conv_linear_quant_conversion(mocker, input_shape, channels_view_out): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion _ = to_quantized_edge_program( - ConvReshapeModule(channels=input_shape[1], new_shape=new_shape), input_shape + ConvLinearViewModule( + channels=input_shape[1], channels_view_out=channels_view_out + ), + input_shape, ) # Capture generated model