Fix mac computation (#1363)

irenaby · web-flow · commit eb96b71abf2d · 2025-02-24T20:00:45.000+02:00
* fix mac computation
* fix code accessing tpc.operator_set in case its None
diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py
@@ -440,18 +440,28 @@ def has_any_weight_attr_to_quantize(self) -> bool:
 
         return any([self.is_weights_quantization_enabled(attr) for attr in self.get_node_weights_attributes()])
 
-    def get_total_output_params(self) -> float:
+    # TODO it makes more sense to standardize the input/output shapes at node creation.
+    def get_output_shapes_list(self) -> List[tuple]:
         """
-        Calculates the output size of the node.
+        Return output shape in a standardized form as a list of tuples.
 
-        Returns: Output size.
+        Returns:
+            A list of output shape tuples.
         """
         # shape can be tuple or list, and multiple shapes can be packed in list or tuple
         if self.output_shape and isinstance(self.output_shape[0], (tuple, list)):
-            output_shapes = self.output_shape
+            output_shapes = [tuple(s) for s in self.output_shape]
         else:
-            output_shapes = [self.output_shape]
+            output_shapes = [tuple(self.output_shape)]
+        return output_shapes
+
+    def get_total_output_params(self) -> float:
+        """
+        Calculates the output size of the node.
 
+        Returns: Output size.
+        """
+        output_shapes = self.get_output_shapes_list()
         # remove batch size (first element) from output shape
         output_shapes = [s[1:] for s in output_shapes]
         # for scalar shape (None,) prod returns 1
@@ -550,7 +560,7 @@ def has_activation_quantization_enabled_candidate(self) -> bool:
         """
 
         return len(self.candidates_quantization_cfg) > 0 and \
-               any([c.activation_quantization_cfg.enable_activation_quantization for c in self.candidates_quantization_cfg])
+            any([c.activation_quantization_cfg.enable_activation_quantization for c in self.candidates_quantization_cfg])
 
     def get_all_weights_attr_candidates(self, attr: str) -> List[WeightsAttrQuantizationConfig]:
         """
diff --git a/model_compression_toolkit/core/keras/keras_implementation.py b/model_compression_toolkit/core/keras/keras_implementation.py
@@ -565,28 +565,24 @@ def get_node_mac_operations(self,
 
         Returns: The MAC count og the operation
         """
-
-        output_shape = node.output_shape
-        kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
-        output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
-
-        if node.is_match_type(Conv2D) or node.is_match_type(Conv2DTranspose):
-            # (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
-            return np.prod([x for x in output_shape if x is not None]) * \
-                   kernel_shape[input_channel_axis] * \
-                   (kernel_shape[0] * kernel_shape[1])
-        elif node.is_match_type(DepthwiseConv2D):
-            # Depth * (W_out * H_out) * C_in * (W_kernel * H_kernel)
-            return node.framework_attr.get(DEPTH_MULTIPLIER) * \
-                   np.prod([x for x in output_shape if x is not None]) / output_shape[output_channel_axis] * \
-                   kernel_shape[input_channel_axis] * \
-                   (kernel_shape[0] * kernel_shape[1])
-        elif node.is_match_type(Dense):
-            # IN * OUT
-            return kernel_shape[0] * kernel_shape[1]
-        else:
+        kernels = fw_info.get_kernel_op_attributes(node.type)
+        if not kernels or kernels[0] is None:
             return 0
 
+        assert len(kernels) == 1
+        kernel_shape = node.get_weights_by_keys(kernels[0]).shape
+
+        if node.is_match_type(Conv2D) or node.is_match_type(Conv2DTranspose) or node.is_match_type(DepthwiseConv2D):
+            h, w = node.get_output_shapes_list()[0][-3:-1]
+            return np.prod(kernel_shape) * h * w
+
+        if node.is_match_type(Dense):
+            # IN * OUT * (all previous dims[:-1])
+            _, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
+            return node.get_total_output_params() * kernel_shape[input_channel_axis]
+
+        return 0
+
     def apply_second_moment_correction(self,
                                        quantized_model: Any,
                                        core_config: CoreConfig,
diff --git a/model_compression_toolkit/core/pytorch/pytorch_implementation.py b/model_compression_toolkit/core/pytorch/pytorch_implementation.py
@@ -506,21 +506,23 @@ def get_node_mac_operations(self,
 
         Returns: The MAC count of the operation
         """
+        kernels = fw_info.get_kernel_op_attributes(node.type)
+        if not kernels or kernels[0] is None:
+            return 0
 
-        output_shape = node.output_shape[0]
-        kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
-        output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
+        assert len(kernels) == 1
+        kernel_shape = node.get_weights_by_keys(kernels[0]).shape
 
         if node.is_match_type(Conv2d) or node.is_match_type(ConvTranspose2d):
-            # (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
-            return np.prod([x for x in output_shape if x is not None]) * \
-                   kernel_shape[input_channel_axis] * \
-                   (kernel_shape[0] * kernel_shape[1])
-        elif node.is_match_type(Linear):
-            # IN * OUT
-            return kernel_shape[0] * kernel_shape[1]
-        else:
-            return 0
+            h, w = node.get_output_shapes_list()[0][-2:]
+            return np.prod(kernel_shape) * h * w
+
+        if node.is_match_type(Linear):
+            # IN * OUT * (all previous dims[:-1])
+            _, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
+            return node.get_total_output_params() * kernel_shape[input_channel_axis]
+
+        return 0
 
     def apply_second_moment_correction(self,
                                        quantized_model: Any,
diff --git a/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/attach2fw.py b/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/attach2fw.py
@@ -39,9 +39,9 @@ def attach(self, tpc_model: TargetPlatformCapabilities,
 
         tpc = FrameworkQuantizationCapabilities(tpc_model)
         custom_opset2layer = custom_opset2layer if custom_opset2layer is not None else {}
-
+        operator_set = tpc_model.operator_set or ()
         with tpc:
-            for opset in tpc_model.operator_set:
+            for opset in operator_set:
                 if isinstance(opset, OperatorsSet):  # filter out OperatorsSetConcat
                     if opset.name in custom_opset2layer:
                         custom_opset_layers = custom_opset2layer[opset.name]
diff --git a/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/framework_quantization_capabilities.py b/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/framework_quantization_capabilities.py
@@ -52,7 +52,8 @@ def __init__(self,
         self.op_sets_to_layers = OperationsToLayers() # Init an empty OperationsToLayers
         self.layer2qco, self.filterlayer2qco = {}, {} # Init empty mappings from layers/LayerFilterParams to QC options
         # Track the unused opsets for warning purposes.
-        self.__tpc_opsets_not_used = [s.name for s in tpc.operator_set]
+        operator_set = tpc.operator_set or ()
+        self.__tpc_opsets_not_used = [s.name for s in operator_set]
         self.remove_fusing_names_from_not_used_list()
 
     def get_layers_by_opset_name(self, opset_name: str) -> List[Any]:
diff --git a/tests_pytest/conftest.py b/tests_pytest/conftest.py
@@ -0,0 +1,71 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from mct_quantizers import QuantizationMethod
+from unittest.mock import Mock
+
+from pytest import fixture
+
+from model_compression_toolkit.core import FrameworkInfo, QuantizationConfig
+from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
+from model_compression_toolkit.target_platform_capabilities import OpQuantizationConfig, Signedness, \
+    QuantizationConfigOptions, TargetPlatformCapabilities
+
+
+@fixture
+def default_op_quant_cfg():
+    return OpQuantizationConfig(
+        default_weight_attr_config={},
+        attr_weights_configs_mapping={},
+        activation_quantization_method=QuantizationMethod.POWER_OF_TWO,
+        activation_n_bits=8,
+        supported_input_activation_n_bits=[8],
+        enable_activation_quantization=True,
+        quantization_preserving=False,
+        fixed_scale=None,
+        fixed_zero_point=None,
+        simd_size=32,
+        signedness=Signedness.AUTO)
+
+
+@fixture
+def default_quant_cfg_options(default_op_quant_cfg):
+    return QuantizationConfigOptions(quantization_configurations=[default_op_quant_cfg])
+
+
+@fixture
+def minimal_tpc(default_quant_cfg_options):
+    return TargetPlatformCapabilities(default_qco=default_quant_cfg_options,
+                                      tpc_platform_type='test',
+                                      operator_set=None,
+                                      fusing_patterns=None)
+
+
+@fixture
+def graph_mock():
+    """ Basic Graph mock. """
+    return Mock(spec_set=Graph, nodes=[])
+
+
+@fixture
+def fw_impl_mock():
+    """ Basic FrameworkImplementation mock. """
+    return Mock(spec_set=FrameworkImplementation)
+
+
+@fixture
+def fw_info_mock():
+    """ Basic FrameworkInfo mock. """
+    return Mock(spec_set=FrameworkInfo)
diff --git a/tests_pytest/keras/core/test_fw_implementation.py b/tests_pytest/keras/core/test_fw_implementation.py
@@ -0,0 +1,78 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from keras.layers import Conv2D, Conv2DTranspose, DepthwiseConv2D, Dense, Input, Flatten
+import keras
+
+from model_compression_toolkit.core import QuantizationConfig
+from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
+from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
+from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
+from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2keras import \
+    AttachTpcToKeras
+
+
+def data_gen():
+    yield [np.random.randn(1, 28, 32, 10)]
+
+
+def build_model():
+    x = Input(shape=(28, 32, 10))
+    y = Conv2D(filters=20, kernel_size=(5, 4))(x)
+    y = Conv2D(filters=15, kernel_size=(4, 6), groups=5)(y)
+    y = Conv2D(filters=8, kernel_size=(3, 3), strides=2)(y)
+    y = Conv2D(filters=12, kernel_size=(3, 3), dilation_rate=2)(y)
+    y = Conv2DTranspose(filters=20, kernel_size=(5, 3))(y)
+    y = Conv2DTranspose(filters=10, kernel_size=(3, 3), strides=2)(y)
+    y = Conv2DTranspose(filters=5, kernel_size=(3, 3), dilation_rate=2)(y)
+    y = DepthwiseConv2D(kernel_size=(2, 3), depth_multiplier=4)(y)
+    y = DepthwiseConv2D(kernel_size=(3, 3), depth_multiplier=2, strides=3)(y)
+    y = DepthwiseConv2D(kernel_size=(3, 3), depth_multiplier=2, dilation_rate=2)(y)
+    y = Dense(10)(y)  # 4d input
+    y = Flatten()(y)
+    y = Dense(5)(y)  # 2d (vector) input
+    return keras.Model(inputs=x, outputs=y)
+
+
+def test_get_mac(minimal_tpc):
+    fw_impl = KerasImplementation()
+    model = build_model()
+    fw_info = DEFAULT_KERAS_INFO
+
+    graph = graph_preparation_runner(model,
+                                     data_gen,
+                                     QuantizationConfig(linear_collapsing=False),
+                                     fw_info=fw_info,
+                                     fw_impl=fw_impl,
+                                     fqc=AttachTpcToKeras().attach(minimal_tpc),
+                                     mixed_precision_enable=False,
+                                     running_gptq=False)
+
+    nodes = graph.get_topo_sorted_nodes()
+    assert len(nodes) == 14, nodes
+    assert fw_impl.get_node_mac_operations(nodes[0], fw_info) == 0
+    assert fw_impl.get_node_mac_operations(nodes[1], fw_info) == (10*20*5*4)*24*29
+    assert fw_impl.get_node_mac_operations(nodes[2], fw_info) == (4*3*4*6)*5*21*24
+    assert fw_impl.get_node_mac_operations(nodes[3], fw_info) == (15*8*3*3)*10*11
+    assert fw_impl.get_node_mac_operations(nodes[4], fw_info) == (8*12*3*3)*6*7
+    assert fw_impl.get_node_mac_operations(nodes[5], fw_info) == (12*20*5*3)*10*9
+    assert fw_impl.get_node_mac_operations(nodes[6], fw_info) == (20*10*3*3)*21*19
+    assert fw_impl.get_node_mac_operations(nodes[7], fw_info) == (10*5*3*3)*25*23
+    assert fw_impl.get_node_mac_operations(nodes[8], fw_info) == (5*2*3*4)*24*21
+    assert fw_impl.get_node_mac_operations(nodes[9], fw_info) == (10*3*3*4)*8*7
+    assert fw_impl.get_node_mac_operations(nodes[10], fw_info) == (40*3*3*2)*4*3
+    assert fw_impl.get_node_mac_operations(nodes[11], fw_info) == 4*3*(80*10)
+    assert fw_impl.get_node_mac_operations(nodes[12], fw_info) == 0
+    assert fw_impl.get_node_mac_operations(nodes[13], fw_info) == (4*3*10)*5
diff --git a/tests_pytest/pytorch/core/test_fw_implementation.py b/tests_pytest/pytorch/core/test_fw_implementation.py
@@ -0,0 +1,100 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import torch
+from torch import nn
+from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.attach2pytorch import \
+    AttachTpcToPytorch
+
+from model_compression_toolkit.core import QuantizationConfig
+from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
+from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
+from model_compression_toolkit.core.pytorch.pytorch_implementation import PytorchImplementation
+
+
+def data_gen():
+    yield [torch.rand(1, 10, 28, 32)]
+
+
+class Model(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(10, 20, kernel_size=(5, 4))
+        self.conv2 = nn.Conv2d(20, 15, kernel_size=(4, 6), groups=5)
+        self.conv3 = nn.Conv2d(15, 8, kernel_size=(3, 3), stride=2)
+        self.conv4 = nn.Conv2d(8, 12, kernel_size=(3, 3), dilation=2)
+        self.convtr1 = nn.ConvTranspose2d(12, 20, kernel_size=(5, 3))
+        self.convtr2 = nn.ConvTranspose2d(20, 10, kernel_size=(3, 3), stride=2)
+        self.convtr3 = nn.ConvTranspose2d(10, 5, kernel_size=(3, 3), dilation=2)
+        self.dwconv1 = nn.Conv2d(5, 20, kernel_size=(2, 3), groups=5)
+        self.dwconv2 = nn.Conv2d(20, 40, kernel_size=(3, 3), groups=20, stride=3)
+        self.dwconv3 = nn.Conv2d(40, 80, kernel_size=(3, 3), groups=40, dilation=2)
+        self.fc1 = nn.Linear(80, 10)
+        self.flatten = nn.Flatten()
+        self.fc2 = nn.Linear(120, 5)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.convtr1(x)
+        x = self.convtr2(x)
+        x = self.convtr3(x)
+        x = self.dwconv1(x)
+        x = self.dwconv2(x)
+        x = self.dwconv3(x)
+        x = torch.permute(x, [0, 2, 3, 1])
+        x = self.fc1(x)
+        x = self.flatten(x)
+        x = self.fc2(x)
+        return x
+
+
+def test_get_mac(minimal_tpc):
+    Model()(next(data_gen())[0])
+
+    fw_impl = PytorchImplementation()
+    fw_info = DEFAULT_PYTORCH_INFO
+    model = Model()
+
+    graph = graph_preparation_runner(model,
+                                     data_gen,
+                                     QuantizationConfig(linear_collapsing=False),
+                                     fw_info=fw_info,
+                                     fw_impl=fw_impl,
+                                     fqc=AttachTpcToPytorch().attach(minimal_tpc),
+                                     mixed_precision_enable=False,
+                                     running_gptq=False)
+
+    nodes = graph.get_topo_sorted_nodes()
+    # assert len(nodes) == 14, nodes
+    assert fw_impl.get_node_mac_operations(nodes[0], fw_info) == 0
+    assert fw_impl.get_node_mac_operations(nodes[1], fw_info) == (10*20*5*4)*24*29
+    assert fw_impl.get_node_mac_operations(nodes[2], fw_info) == (4*3*4*6)*5*21*24
+    assert fw_impl.get_node_mac_operations(nodes[3], fw_info) == (15*8*3*3)*10*11
+    assert fw_impl.get_node_mac_operations(nodes[4], fw_info) == (8*12*3*3)*6*7
+    assert fw_impl.get_node_mac_operations(nodes[5], fw_info) == (12*20*5*3)*10*9
+    assert fw_impl.get_node_mac_operations(nodes[6], fw_info) == (20*10*3*3)*21*19
+    assert fw_impl.get_node_mac_operations(nodes[7], fw_info) == (10*5*3*3)*25*23
+    assert fw_impl.get_node_mac_operations(nodes[8], fw_info) == (5*2*3*4)*24*21
+    assert fw_impl.get_node_mac_operations(nodes[9], fw_info) == (10*3*3*4)*8*7
+    assert fw_impl.get_node_mac_operations(nodes[10], fw_info) == (40*3*3*2)*4*3
+    assert fw_impl.get_node_mac_operations(nodes[10], fw_info) == (40*3*3*2)*4*3
+    assert fw_impl.get_node_mac_operations(nodes[11], fw_info) == 0
+    assert fw_impl.get_node_mac_operations(nodes[12], fw_info) == 4*3*(80*10)
+    assert fw_impl.get_node_mac_operations(nodes[13], fw_info) == 0
+    assert fw_impl.get_node_mac_operations(nodes[14], fw_info) == (4*3*10)*5
+
+