diff --git a/model_compression_toolkit/constants.py b/model_compression_toolkit/constants.py index 59f790936..03f58bfa6 100644 --- a/model_compression_toolkit/constants.py +++ b/model_compression_toolkit/constants.py @@ -142,4 +142,4 @@ # Fusing Patterns constants FUSED_LAYER_PATTERN = 'fused_layer_pattern' -FUSED_OP_QUANT_CONFIG = 'fused_op_quantization_config' \ No newline at end of file +FUSE_OP_QUANT_CONFIG = 'fuse_op_quantization_config' diff --git a/model_compression_toolkit/core/common/fusion/fusing_info.py b/model_compression_toolkit/core/common/fusion/fusing_info.py index f0aae3f64..ceaebb2da 100644 --- a/model_compression_toolkit/core/common/fusion/fusing_info.py +++ b/model_compression_toolkit/core/common/fusion/fusing_info.py @@ -15,7 +15,7 @@ from model_compression_toolkit.target_platform_capabilities import LayerFilterParams from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import OpQuantizationConfig -from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG from dataclasses import dataclass, field from typing import Optional, List, Dict, Any, Tuple @@ -131,7 +131,7 @@ def set_fused_op_quantization_config(self, op_id: str, nodes: Tuple['BaseNode']) """ fusing_pattern = next((fp for fp in self.fusing_patterns if is_valid_fusion([fp.get(FUSED_LAYER_PATTERN)], nodes)), None) if fusing_pattern is not None: - self.fused_op_id_to_quant_config[op_id] = fusing_pattern.get(FUSED_OP_QUANT_CONFIG) + self.fused_op_id_to_quant_config[op_id] = fusing_pattern.get(FUSE_OP_QUANT_CONFIG) def remove_fused_operation(self, op_id: str) -> None: """ diff --git a/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py b/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py index 89abdf27d..1814dbcaa 100644 --- a/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +++ b/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py @@ -33,7 +33,7 @@ from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder from model_compression_toolkit.core.pytorch.utils import to_torch_tensor from mct_quantizers.common.constants import ACTIVATION_HOLDER_QUANTIZER -from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder, PytorchPreservingActivationQuantizationHolder +from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder, PytorchPreservingActivationQuantizationHolder, PytorchFLNActivationQuantizationHolder def _build_input_tensors_list(node: BaseNode, @@ -347,6 +347,12 @@ def _add_modules(self, reused_nodes_only=False): holder_type=PytorchPreservingActivationQuantizationHolder, **holder_kwargs) + elif node.is_fln_quantization(): + holder_kwargs = {'quantization_bypass': True} + activation_quantizer_holder = self.get_activation_quantizer_holder(node, + holder_type=PytorchFLNActivationQuantizationHolder, + **holder_kwargs) + if activation_quantizer_holder is not None: activation_quantizer_holder_name = node.name + '_' + ACTIVATION_HOLDER_QUANTIZER self.add_module(activation_quantizer_holder_name, activation_quantizer_holder) diff --git a/model_compression_toolkit/exporter/model_wrapper/fw_agnostic/get_inferable_quantizers.py b/model_compression_toolkit/exporter/model_wrapper/fw_agnostic/get_inferable_quantizers.py index fdda9f837..f991810fe 100644 --- a/model_compression_toolkit/exporter/model_wrapper/fw_agnostic/get_inferable_quantizers.py +++ b/model_compression_toolkit/exporter/model_wrapper/fw_agnostic/get_inferable_quantizers.py @@ -42,7 +42,7 @@ def get_inferable_quantizers(node: BaseNode, weight_quantizer = get_weights_quantizer_for_node(node, attr) weight_quantizers[attr] = weight_quantizer - if node.is_activation_quantization_enabled(): + if node.is_activation_quantization_enabled() or node.is_fln_quantization(): num_of_outputs = len(node.output_shape) if isinstance(node.output_shape, list) else 1 activation_quantizers = [get_activations_quantizer_for_node(node)] * num_of_outputs diff --git a/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/framework_quantization_capabilities.py b/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/framework_quantization_capabilities.py index be7e59ec7..1064072de 100644 --- a/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/framework_quantization_capabilities.py +++ b/model_compression_toolkit/target_platform_capabilities/targetplatform2framework/framework_quantization_capabilities.py @@ -31,7 +31,7 @@ OpQuantizationConfig, QuantizationConfigOptions from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.current_tpc import _current_tpc -from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG class FrameworkQuantizationCapabilities(ImmutableClass): @@ -113,8 +113,8 @@ def get_fusing_patterns(self) -> List[Dict[List[Any], OpQuantizationConfig]]: ops = [self.get_layers_by_opset(x) for x in p.operator_groups] res.extend(itertools.product(*ops)) - fused_op_quant_config = getattr(p, FUSED_OP_QUANT_CONFIG, None) - patterns.extend({FUSED_LAYER_PATTERN: list(x), FUSED_OP_QUANT_CONFIG: fused_op_quant_config} for x in res) + fuse_op_quant_config = getattr(p, FUSE_OP_QUANT_CONFIG, None) + patterns.extend({FUSED_LAYER_PATTERN: list(x), FUSE_OP_QUANT_CONFIG: fuse_op_quant_config} for x in res) return patterns diff --git a/tests/external_tests/keras_tests/models_tests/test_networks_runner.py b/tests/external_tests/keras_tests/models_tests/test_networks_runner.py index 58d054997..4ea3a4261 100644 --- a/tests/external_tests/keras_tests/models_tests/test_networks_runner.py +++ b/tests/external_tests/keras_tests/models_tests/test_networks_runner.py @@ -97,15 +97,21 @@ def compare(self, inputs_list, quantized_model, qc, tpc): self.unit_test.assertTrue(False, f'fail TFLite convertion with the following error: {error_msg}') def run_network(self, inputs_list, qc, tpc): + from model_compression_toolkit.gptq.common.gptq_config import GradualActivationQuantizationConfig def representative_data_gen(): for _ in range(self.num_calibration_iter): yield inputs_list core_config = mct.core.CoreConfig(quantization_config=qc) if self.gptq: - arc = mct.gptq.GradientPTQConfig(n_epochs=2, optimizer=tf.keras.optimizers.Adam( - learning_rate=0.0001), optimizer_rest=tf.keras.optimizers.Adam( - learning_rate=0.0001), loss=multiple_tensors_mse_loss) + arc = mct.gptq.GradientPTQConfig(n_epochs=2, + optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), + optimizer_rest=tf.keras.optimizers.Adam(learning_rate=0.0001), + loss=multiple_tensors_mse_loss, + train_bias=True, + hessian_weights_config=None, + gradual_activation_quantization_config= GradualActivationQuantizationConfig(), + regularization_factor=1) ptq_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization( self.model_float, diff --git a/tests/keras_tests/non_parallel_tests/test_keras_tpc.py b/tests/keras_tests/non_parallel_tests/test_keras_tpc.py index bc2304adb..c39965616 100644 --- a/tests/keras_tests/non_parallel_tests/test_keras_tpc.py +++ b/tests/keras_tests/non_parallel_tests/test_keras_tpc.py @@ -41,7 +41,7 @@ from keras import Input import model_compression_toolkit as mct -from model_compression_toolkit.constants import TENSORFLOW, FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import TENSORFLOW, FUSED_LAYER_PATTERN from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL, IMX500_TP_MODEL, \ QNNPACK_TP_MODEL, TFLITE_TP_MODEL, KERNEL_ATTR, BIAS_ATTR, KERAS_KERNEL, BIAS, WEIGHTS_N_BITS from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation diff --git a/tests_pytest/common_tests/unit_tests/core/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py b/tests_pytest/common_tests/unit_tests/core/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py index bc51c6908..b377e6eb2 100644 --- a/tests_pytest/common_tests/unit_tests/core/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py +++ b/tests_pytest/common_tests/unit_tests/core/mixed_precision/resource_utilization_tools/test_resource_utilization_calculator.py @@ -20,7 +20,7 @@ import pytest from model_compression_toolkit.core.common.graph.base_graph import OutTensor -from model_compression_toolkit.constants import FLOAT_BITWIDTH, FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import FLOAT_BITWIDTH, FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG from model_compression_toolkit.core import ResourceUtilization from model_compression_toolkit.core.common import Graph from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfo @@ -574,7 +574,7 @@ def test_compute_cuts_random_fusion_valid_utilization(self, seed, disable_quanti if i + fuse_len <= num_nodes: fused = tuple(nodes[j] for j in range(i, i + fuse_len)) fused_name = f"FusedNode_{'_'.join(n.name for n in fused)}" - fused_pattern = {FUSED_LAYER_PATTERN: [n.layer_class for n in fused], FUSED_OP_QUANT_CONFIG: None} + fused_pattern = {FUSED_LAYER_PATTERN: [n.layer_class for n in fused], FUSE_OP_QUANT_CONFIG: None} fused_patterns.append(fused_pattern) fused_data[fused_name] = fused i += fuse_len diff --git a/tests_pytest/common_tests/unit_tests/core/test_fusion_info.py b/tests_pytest/common_tests/unit_tests/core/test_fusion_info.py index 5e02c22bb..4f91d68e2 100644 --- a/tests_pytest/common_tests/unit_tests/core/test_fusion_info.py +++ b/tests_pytest/common_tests/unit_tests/core/test_fusion_info.py @@ -19,7 +19,7 @@ from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfoGenerator, FUSED_OP_ID_PREFIX, FusingInfo from model_compression_toolkit.target_platform_capabilities import FrameworkQuantizationCapabilities from model_compression_toolkit.core.common import BaseNode -from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG from mct_quantizers import QuantizationMethod from tests.common_tests.helpers.generate_test_tpc import generate_test_attr_configs, generate_test_op_qc @@ -51,8 +51,8 @@ def fusing_patterns(): """ - Returns predefined fusing patterns: Conv2D + ReLU and Linear + Softmax. """ - return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSED_OP_QUANT_CONFIG: None}] + return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSE_OP_QUANT_CONFIG: None}] @pytest.fixture @@ -249,10 +249,10 @@ def fusing_patterns_with_qconfig(): """ - Returns predefined fusing patterns: Conv2D + ReLU and Conv2D + Tanh, Linear + Softmax. """ - return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSED_OP_QUANT_CONFIG: TEST_QC_1}, - {FUSED_LAYER_PATTERN: ["Conv2d", "Tanh"], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: ["Conv2d", "BatchNorm2d", "ReLU6"], FUSED_OP_QUANT_CONFIG: TEST_QC_2}, - {FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSED_OP_QUANT_CONFIG: TEST_QC_3 }] + return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSE_OP_QUANT_CONFIG: TEST_QC_1}, + {FUSED_LAYER_PATTERN: ["Conv2d", "Tanh"], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: ["Conv2d", "BatchNorm2d", "ReLU6"], FUSE_OP_QUANT_CONFIG: TEST_QC_2}, + {FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSE_OP_QUANT_CONFIG: TEST_QC_3 }] @pytest.fixture def fusing_info_generator_with_qconfig(fusing_patterns_with_qconfig): diff --git a/tests_pytest/keras_tests/integration_tests/core/fusion/test_fusing_info_generator_keras.py b/tests_pytest/keras_tests/integration_tests/core/fusion/test_fusing_info_generator_keras.py index c3902160e..5bed7a170 100644 --- a/tests_pytest/keras_tests/integration_tests/core/fusion/test_fusing_info_generator_keras.py +++ b/tests_pytest/keras_tests/integration_tests/core/fusion/test_fusing_info_generator_keras.py @@ -23,7 +23,7 @@ from tests_pytest._test_util.graph_builder_utils import build_node from tests_pytest.keras_tests.keras_test_util.keras_test_mixin import KerasFwMixin import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema -from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG from tensorflow.keras import backend as K @@ -53,7 +53,7 @@ class TestFusingConvRelu(BaseTestFusingInfoGeneratorKeras): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -96,7 +96,7 @@ class TestFusingAnyActKeras(BaseTestFusingInfoGeneratorKeras): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -153,7 +153,7 @@ class TestFusingConvReLUOnlyKeras(BaseTestFusingInfoGeneratorKeras): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -220,12 +220,12 @@ class TestFusingComplexPatternsKeras(BaseTestFusingInfoGeneratorKeras): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[1]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[2]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[3]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[4]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[5]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[1]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[2]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[3]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[4]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[5]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -313,7 +313,7 @@ class TestFusingConvSwishWithMultiSuccessorsKeras(BaseTestFusingInfoGeneratorKer ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -360,7 +360,7 @@ class TestFusingConvReluWithMultiPredecessorsKeras(BaseTestFusingInfoGeneratorKe ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( diff --git a/tests_pytest/pytorch_tests/e2e_tests/test_fln_quantization_holder.py b/tests_pytest/pytorch_tests/e2e_tests/test_fln_quantization_holder.py new file mode 100644 index 000000000..a88ff07b2 --- /dev/null +++ b/tests_pytest/pytorch_tests/e2e_tests/test_fln_quantization_holder.py @@ -0,0 +1,142 @@ +# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import model_compression_toolkit as mct +import torch +from mct_quantizers import PytorchActivationQuantizationHolder, PytorchFLNActivationQuantizationHolder + +from tests_pytest._test_util.tpc_util import configure_mp_activation_opsets +from model_compression_toolkit.target_platform_capabilities.schema.v2 import QuantizationMethod, AttributeQuantizationConfig, \ + OpQuantizationConfig, QuantizationConfigOptions, Signedness, OperatorSetNames, TargetPlatformCapabilities, Fusing, OperatorsSet +from tests.common_tests.helpers.generate_test_tpc import generate_test_attr_configs, generate_test_op_qc + + +def build_tpc(): + default_op_cfg = OpQuantizationConfig( + default_weight_attr_config=AttributeQuantizationConfig(), + attr_weights_configs_mapping={}, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + activation_n_bits=8, + supported_input_activation_n_bits=[8], + enable_activation_quantization=True, + enable_weights_quantization=True, + quantization_preserving=False, + fixed_scale=None, + fixed_zero_point=None, + simd_size=32, + signedness=Signedness.AUTO + ) + + opsets, _ = configure_mp_activation_opsets( + opset_names=[OperatorSetNames.CONV, + OperatorSetNames.RELU, + OperatorSetNames.SIGMOID, + OperatorSetNames.FULLY_CONNECTED, + OperatorSetNames.HARDSWISH], + base_op_config=default_op_cfg, + a_nbits=[8] + ) + default_cfg = QuantizationConfigOptions(quantization_configurations=[default_op_cfg]) + + test_qc = generate_test_op_qc(**generate_test_attr_configs(), activation_n_bits=16) + + tpc = TargetPlatformCapabilities( + default_qco=default_cfg, + operator_set=opsets, + fusing_patterns=[ + Fusing(operator_groups=( + OperatorsSet(name=OperatorSetNames.CONV), + OperatorsSet(name=OperatorSetNames.RELU)), fuse_op_quantization_config=test_qc), + Fusing(operator_groups=( + OperatorsSet(name=OperatorSetNames.CONV), + OperatorsSet(name=OperatorSetNames.SIGMOID))), + Fusing(operator_groups=( + OperatorsSet(name=OperatorSetNames.FULLY_CONNECTED), + OperatorsSet(name=OperatorSetNames.HARDSWISH)), fuse_op_quantization_config=test_qc), + ] + ) + return tpc + +def representative_data_gen(shape=(3, 8, 8), num_inputs=1, batch_size=2, num_iter=1): + for _ in range(num_iter): + yield [torch.randn(batch_size, *shape)] * num_inputs + +def get_float_model(): + class BaseModel(torch.nn.Module): + def __init__(self): + super().__init__() + self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3) + self.relu = torch.nn.ReLU() + self.conv2 = torch.nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3) + self.sigmoid = torch.nn.Sigmoid() + self.flatten = torch.nn.Flatten() + self.fc = torch.nn.Linear(in_features=48, out_features=10) + self.hswish = torch.nn.Hardswish() + + def forward(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.sigmoid(x) + x = self.flatten(x) + x = self.fc(x) + x = self.hswish(x) + return x + return BaseModel() + +def test_fln_quantization_holder(): + + float_model = get_float_model() + tpc = build_tpc() + + quantized_model, _ = mct.ptq.pytorch_post_training_quantization( + in_module=float_model, + representative_data_gen=representative_data_gen, + target_platform_capabilities=tpc + ) + + # check conv1 + assert hasattr(quantized_model, 'conv1_activation_holder_quantizer') + conv1_activation_holder_quantizer = quantized_model.conv1_activation_holder_quantizer + assert isinstance(conv1_activation_holder_quantizer, PytorchFLNActivationQuantizationHolder) + assert conv1_activation_holder_quantizer.quantization_bypass == True + assert conv1_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16 + + # check relu + assert hasattr(quantized_model, 'relu_activation_holder_quantizer') + relu_activation_holder_quantizer = quantized_model.relu_activation_holder_quantizer + assert isinstance(relu_activation_holder_quantizer, PytorchActivationQuantizationHolder) + assert relu_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8 + + # check conv2 + assert not hasattr(quantized_model, 'conv2_activation_holder_quantizer') + + # check sigmoid + assert hasattr(quantized_model, 'sigmoid_activation_holder_quantizer') + sigmoid_activation_holder_quantizer = quantized_model.sigmoid_activation_holder_quantizer + assert isinstance(sigmoid_activation_holder_quantizer, PytorchActivationQuantizationHolder) + assert sigmoid_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8 + + # check fc + assert hasattr(quantized_model, 'fc_activation_holder_quantizer') + fc_activation_holder_quantizer = quantized_model.fc_activation_holder_quantizer + assert isinstance(fc_activation_holder_quantizer, PytorchFLNActivationQuantizationHolder) + assert fc_activation_holder_quantizer.quantization_bypass == True + assert fc_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16 + + # check hswish + assert hasattr(quantized_model, 'hswish_activation_holder_quantizer') + hswish_activation_holder_quantizer = quantized_model.hswish_activation_holder_quantizer + assert isinstance(hswish_activation_holder_quantizer, PytorchActivationQuantizationHolder) + assert hswish_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8 \ No newline at end of file diff --git a/tests_pytest/pytorch_tests/integration_tests/core/fusion/test_fusing_info_generator_torch.py b/tests_pytest/pytorch_tests/integration_tests/core/fusion/test_fusing_info_generator_torch.py index 798ef7206..9d0d60700 100644 --- a/tests_pytest/pytorch_tests/integration_tests/core/fusion/test_fusing_info_generator_torch.py +++ b/tests_pytest/pytorch_tests/integration_tests/core/fusion/test_fusing_info_generator_torch.py @@ -26,7 +26,7 @@ import torch.nn as nn import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema -from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG +from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG class BaseTestFusingInfoGeneratorPytorch(BaseFusingInfoGeneratorTest, TorchFwMixin): @@ -53,7 +53,7 @@ class TestFusingConvRelu(BaseTestFusingInfoGeneratorPytorch): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -104,7 +104,7 @@ class TestFusingAnyAct(BaseTestFusingInfoGeneratorPytorch): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -175,7 +175,7 @@ class TestFusingConvReLUOnly(BaseTestFusingInfoGeneratorPytorch): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -254,12 +254,12 @@ class TestFusingComplexPatterns(BaseTestFusingInfoGeneratorPytorch): ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[1]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[2]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[3]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[4]], FUSED_OP_QUANT_CONFIG: None}, - {FUSED_LAYER_PATTERN: [fusing_patterns[5]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[1]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[2]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[3]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[4]], FUSE_OP_QUANT_CONFIG: None}, + {FUSED_LAYER_PATTERN: [fusing_patterns[5]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -362,7 +362,7 @@ class TestFusingConvSwishWithMultiSuccessors(BaseTestFusingInfoGeneratorPytorch) ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( @@ -415,7 +415,7 @@ class TestFusingConvReluWithMultiPredecessors(BaseTestFusingInfoGeneratorPytorch ] expected_fusing_patterns = [ - {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None} + {FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None} ] expected_fi = FusingInfo( diff --git a/tests_pytest/pytorch_tests/unit_tests/core/back2framework/test_pytorch_model_builder_fln.py b/tests_pytest/pytorch_tests/unit_tests/core/back2framework/test_pytorch_model_builder_fln.py new file mode 100644 index 000000000..e19f8b75d --- /dev/null +++ b/tests_pytest/pytorch_tests/unit_tests/core/back2framework/test_pytorch_model_builder_fln.py @@ -0,0 +1,183 @@ +# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from unittest.mock import Mock +from typing import List +import torch +from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_activation_quantizer_holder, fully_quantized_wrapper +from mct_quantizers import QuantizationMethod, PytorchActivationQuantizationHolder, PytorchFLNActivationQuantizationHolder + +from model_compression_toolkit.core.common import Graph, BaseNode +from model_compression_toolkit.core.common.graph.edge import Edge +from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import CandidateNodeQuantizationConfig, NodeQuantizationConfig +from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeActivationQuantizationConfig, NodeWeightsQuantizationConfig, ActivationQuantizationMode +from model_compression_toolkit.target_platform_capabilities import AttributeQuantizationConfig, OpQuantizationConfig, Signedness +from model_compression_toolkit.core import QuantizationConfig +from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping +from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder +from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import FrameworkQuantizationCapabilities +from model_compression_toolkit.core.pytorch.default_framework_info import PyTorchInfo +from model_compression_toolkit.core.common.framework_info import set_fw_info +from tests_pytest._test_util.graph_builder_utils import DummyLayer +from tests_pytest._test_util.tpc_util import minimal_tpc + + +def build_node(name='node', framework_attr={}, layer_class=DummyLayer, + qcs: List[CandidateNodeQuantizationConfig] = None): + node = BaseNode(name=name, + framework_attr=framework_attr, + input_shape=(4, 5, 6), + output_shape=(4, 5, 6), + weights={}, + layer_class=layer_class, + reuse=False) + if qcs: + assert isinstance(qcs, list) + node.quantization_cfg = NodeQuantizationConfig(base_quantization_cfg=qcs[0], candidates_quantization_cfg=qcs) + return node + +def build_qc(q_mode=ActivationQuantizationMode.QUANT): + op_cfg = OpQuantizationConfig( + default_weight_attr_config=AttributeQuantizationConfig(), + attr_weights_configs_mapping={}, + activation_quantization_method=QuantizationMethod.POWER_OF_TWO, + activation_n_bits=8, + enable_activation_quantization=True, + quantization_preserving=False, + supported_input_activation_n_bits=8, + signedness=Signedness.AUTO + ) + a_qcfg = NodeActivationQuantizationConfig(op_cfg=op_cfg) + a_qcfg.quant_mode = q_mode + w_qcfg = NodeWeightsQuantizationConfig(op_cfg=op_cfg, + weights_channels_axis=ChannelAxisMapping(0, 1), + node_attrs_list=['weight', 'bias']) + qc = CandidateNodeQuantizationConfig(activation_quantization_cfg=a_qcfg, + weights_quantization_cfg=w_qcfg) + return qc + +def get_test_graph(): + + set_fw_info(PyTorchInfo) + + conv1 = build_node('conv1', framework_attr={'in_channels':3, 'out_channels':3, 'kernel_size':3}, + layer_class=torch.nn.Conv2d, qcs=[build_qc(q_mode=ActivationQuantizationMode.FLN_QUANT)]) + relu = build_node('relu', layer_class=torch.nn.ReLU, qcs=[build_qc()]) + conv2 = build_node('conv2', framework_attr={'in_channels':3, 'out_channels':3, 'kernel_size':3}, + layer_class=torch.nn.Conv2d, qcs=[build_qc(q_mode=ActivationQuantizationMode.FLN_NO_QUANT)]) + sigmoid = build_node('sigmoid', layer_class=torch.nn.Sigmoid, qcs=[build_qc()]) + flatten = build_node('flatten', layer_class=torch.nn.Flatten, + qcs=[build_qc(q_mode=ActivationQuantizationMode.PRESERVE_QUANT)]) + fc = build_node('fc', framework_attr={'in_features':48, 'out_features':10}, + layer_class=torch.nn.Linear, qcs=[build_qc(q_mode=ActivationQuantizationMode.FLN_QUANT)]) + hswish = build_node('hswish', layer_class=torch.nn.Hardswish, qcs=[build_qc()]) + + graph = Graph('g', input_nodes=[conv1], + nodes=[relu, conv2, sigmoid, flatten, fc], + output_nodes=[hswish], + edge_list=[Edge(conv1, relu, 0, 0), + Edge(relu, conv2, 0, 0), + Edge(conv2, sigmoid, 0, 0), + Edge(sigmoid, flatten, 0, 0), + Edge(flatten, fc, 0, 0), + Edge(fc, hswish, 0, 0), + ] + ) + fqc = FrameworkQuantizationCapabilities(tpc=minimal_tpc(), name="test") + graph.set_fqc(fqc) + + return graph + +def get_inferable_quantizers_mock(node): + + if node.name == 'conv2' or node.name == 'relu': + activation_quantizers = Mock() + activation_quantizers.num_bits = 8 + activation_quantizers.signed = False + activation_quantizers.threshold_np = 8.0 + + elif node.name == 'conv1' or node.name == 'fc': + activation_quantizers = Mock() + activation_quantizers.num_bits = 16 + activation_quantizers.signed = True + activation_quantizers.threshold_np = 16.0 + + elif node.name == 'sigmoid' or node.name == 'hswish': + activation_quantizers = Mock() + activation_quantizers.num_bits = 4 + activation_quantizers.signed = False + activation_quantizers.threshold_np = 4.0 + else: + return {}, [] + + return {}, [activation_quantizers] + + +class TestPyTorchModelBuilder(): + + def test_pytorch_model(self, fw_impl_mock): + graph = get_test_graph() + fw_impl_mock.get_inferable_quantizers.side_effect = lambda node: get_inferable_quantizers_mock(node) + exportable_model, _ = PyTorchModelBuilder(graph=graph, + wrapper=lambda n, m: + fully_quantized_wrapper(n, m, + fw_impl=fw_impl_mock), + get_activation_quantizer_holder_fn=lambda n, holder_type, **kwargs: + get_activation_quantizer_holder(n, holder_type, + fw_impl=fw_impl_mock, **kwargs)).build_model() + + # check conv1 + assert hasattr(exportable_model, 'conv1_activation_holder_quantizer') + conv1_activation_holder_quantizer = exportable_model.conv1_activation_holder_quantizer + assert isinstance(conv1_activation_holder_quantizer, PytorchFLNActivationQuantizationHolder) + assert conv1_activation_holder_quantizer.quantization_bypass == True + assert conv1_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16 + assert conv1_activation_holder_quantizer.activation_holder_quantizer.signed == True + assert conv1_activation_holder_quantizer.activation_holder_quantizer.threshold_np == 16.0 + + # check relu + assert hasattr(exportable_model, 'relu_activation_holder_quantizer') + relu_activation_holder_quantizer = exportable_model.relu_activation_holder_quantizer + assert isinstance(relu_activation_holder_quantizer, PytorchActivationQuantizationHolder) + assert relu_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8 + assert relu_activation_holder_quantizer.activation_holder_quantizer.signed == False + assert relu_activation_holder_quantizer.activation_holder_quantizer.threshold_np == 8.0 + + # check conv2 (FLN_NO_QUANT) + assert not hasattr(exportable_model, 'conv2_activation_holder_quantizer') + + # check sigmoid + assert hasattr(exportable_model, 'sigmoid_activation_holder_quantizer') + sigmoid_activation_holder_quantizer = exportable_model.sigmoid_activation_holder_quantizer + assert isinstance(sigmoid_activation_holder_quantizer, PytorchActivationQuantizationHolder) + assert sigmoid_activation_holder_quantizer.activation_holder_quantizer.num_bits == 4 + assert sigmoid_activation_holder_quantizer.activation_holder_quantizer.signed == False + assert sigmoid_activation_holder_quantizer.activation_holder_quantizer.threshold_np == 4.0 + + # check fc + assert hasattr(exportable_model, 'fc_activation_holder_quantizer') + fc_activation_holder_quantizer = exportable_model.fc_activation_holder_quantizer + assert isinstance(fc_activation_holder_quantizer, PytorchFLNActivationQuantizationHolder) + assert fc_activation_holder_quantizer.quantization_bypass == True + assert fc_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16 + assert fc_activation_holder_quantizer.activation_holder_quantizer.signed == True + assert fc_activation_holder_quantizer.activation_holder_quantizer.threshold_np == 16.0 + + # check hswish + assert hasattr(exportable_model, 'hswish_activation_holder_quantizer') + hswish_activation_holder_quantizer = exportable_model.hswish_activation_holder_quantizer + assert isinstance(hswish_activation_holder_quantizer, PytorchActivationQuantizationHolder) + assert hswish_activation_holder_quantizer.activation_holder_quantizer.num_bits == 4 + assert hswish_activation_holder_quantizer.activation_holder_quantizer.signed == False + assert hswish_activation_holder_quantizer.activation_holder_quantizer.threshold_np == 4.0 diff --git a/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/builder/test_get_activation_quantizer_holder_fln.py b/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/builder/test_get_activation_quantizer_holder_fln.py new file mode 100644 index 000000000..b3a070db6 --- /dev/null +++ b/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/builder/test_get_activation_quantizer_holder_fln.py @@ -0,0 +1,50 @@ +# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from unittest.mock import Mock + +from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_activation_quantizer_holder +from mct_quantizers import PytorchActivationQuantizationHolder, PytorchFLNActivationQuantizationHolder + + +def test_get_activation_quantizer_holder(fw_impl_mock): + + activation_quantizers = Mock() + activation_quantizers.num_bits = 8 + activation_quantizers.signed = False + activation_quantizers.threshold_np = 4.0 + fw_impl_mock.get_inferable_quantizers.return_value = (None, [activation_quantizers]) + activation_quantization_holder = get_activation_quantizer_holder(node=Mock(), holder_type=PytorchActivationQuantizationHolder, + fw_impl=fw_impl_mock) + + assert isinstance(activation_quantization_holder, PytorchActivationQuantizationHolder) + assert activation_quantization_holder.activation_holder_quantizer.num_bits == 8 + assert activation_quantization_holder.activation_holder_quantizer.signed == False + assert activation_quantization_holder.activation_holder_quantizer.threshold_np == 4.0 + +def test_get_fln_activation_quantizer_holder(fw_impl_mock): + + activation_quantizers = Mock() + activation_quantizers.num_bits = 16 + activation_quantizers.signed = True + activation_quantizers.threshold_np = 16.0 + fw_impl_mock.get_inferable_quantizers.return_value = (None, [activation_quantizers]) + activation_quantization_holder = get_activation_quantizer_holder(node=Mock(), holder_type=PytorchFLNActivationQuantizationHolder, + fw_impl=fw_impl_mock, **{'quantization_bypass': True}) + + assert isinstance(activation_quantization_holder, PytorchFLNActivationQuantizationHolder) + assert activation_quantization_holder.quantization_bypass == True + assert activation_quantization_holder.activation_holder_quantizer.num_bits == 16 + assert activation_quantization_holder.activation_holder_quantizer.signed == True + assert activation_quantization_holder.activation_holder_quantizer.threshold_np == 16.0 \ No newline at end of file diff --git a/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/test_get_inferable_quantizers.py b/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/test_get_inferable_quantizers.py new file mode 100644 index 000000000..b268814dd --- /dev/null +++ b/tests_pytest/pytorch_tests/unit_tests/exporter/model_wrapper/test_get_inferable_quantizers.py @@ -0,0 +1,69 @@ +# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from unittest.mock import Mock + +from model_compression_toolkit.exporter.model_wrapper.fw_agnostic.get_inferable_quantizers import get_inferable_quantizers +from mct_quantizers.pytorch.quantizers.activation_inferable_quantizers.activation_pot_inferable_quantizer import ActivationPOTInferableQuantizer + + +def test_get_inferable_quantizers(): + + # QUANT node + node_mock = Mock() + node_mock.is_activation_quantization_enabled.return_value = True + node_mock.is_fln_quantization.return_value = False + node_mock.output_shape = 1 + + get_activations_quantizer_for_node_mock = Mock() + get_activations_quantizer_for_node_mock.return_value = ActivationPOTInferableQuantizer(num_bits=8, signed=True, threshold=[8.0]) + + _, activation_quantizers = get_inferable_quantizers(node=node_mock, get_weights_quantizer_for_node=None, + get_activations_quantizer_for_node=get_activations_quantizer_for_node_mock) + + assert len(activation_quantizers) == 1 + assert isinstance(activation_quantizers[0], ActivationPOTInferableQuantizer) + assert activation_quantizers[0].num_bits == 8 + assert activation_quantizers[0].signed == True + assert activation_quantizers[0].threshold_np == 8.0 + + # FLN_QUANT node + node_mock = Mock() + node_mock.is_activation_quantization_enabled.return_value = False + node_mock.is_fln_quantization.return_value = True + node_mock.output_shape = 1 + + get_activations_quantizer_for_node_mock = Mock() + get_activations_quantizer_for_node_mock.return_value = ActivationPOTInferableQuantizer(num_bits=16, signed=False, threshold=[4.0]) + _, activation_quantizers = get_inferable_quantizers(node=node_mock, get_weights_quantizer_for_node=None, + get_activations_quantizer_for_node=get_activations_quantizer_for_node_mock) + + assert len(activation_quantizers) == 1 + assert isinstance(activation_quantizers[0], ActivationPOTInferableQuantizer) + assert activation_quantizers[0].num_bits == 16 + assert activation_quantizers[0].signed == False + assert activation_quantizers[0].threshold_np == 4.0 + + # NO_QUANT and FLN_NO_QUANT node + node_mock = Mock() + node_mock.is_activation_quantization_enabled.return_value = False + node_mock.is_fln_quantization.return_value = False + node_mock.output_shape = 1 + + get_activations_quantizer_for_node_mock = Mock() + get_activations_quantizer_for_node_mock.return_value = ActivationPOTInferableQuantizer(num_bits=8, signed=True, threshold=[16.0]) + _, activation_quantizers = get_inferable_quantizers(node=node_mock, get_weights_quantizer_for_node=None, + get_activations_quantizer_for_node=get_activations_quantizer_for_node_mock) + + assert len(activation_quantizers) == 0