Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion model_compression_toolkit/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,4 +142,4 @@

# Fusing Patterns constants
FUSED_LAYER_PATTERN = 'fused_layer_pattern'
FUSED_OP_QUANT_CONFIG = 'fused_op_quantization_config'
FUSE_OP_QUANT_CONFIG = 'fuse_op_quantization_config'
4 changes: 2 additions & 2 deletions model_compression_toolkit/core/common/fusion/fusing_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from model_compression_toolkit.target_platform_capabilities import LayerFilterParams
from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import OpQuantizationConfig
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG
from dataclasses import dataclass, field

from typing import Optional, List, Dict, Any, Tuple
Expand Down Expand Up @@ -131,7 +131,7 @@ def set_fused_op_quantization_config(self, op_id: str, nodes: Tuple['BaseNode'])
"""
fusing_pattern = next((fp for fp in self.fusing_patterns if is_valid_fusion([fp.get(FUSED_LAYER_PATTERN)], nodes)), None)
if fusing_pattern is not None:
self.fused_op_id_to_quant_config[op_id] = fusing_pattern.get(FUSED_OP_QUANT_CONFIG)
self.fused_op_id_to_quant_config[op_id] = fusing_pattern.get(FUSE_OP_QUANT_CONFIG)

def remove_fused_operation(self, op_id: str) -> None:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder
from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
from mct_quantizers.common.constants import ACTIVATION_HOLDER_QUANTIZER
from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder, PytorchPreservingActivationQuantizationHolder
from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder, PytorchPreservingActivationQuantizationHolder, PytorchFLNActivationQuantizationHolder


def _build_input_tensors_list(node: BaseNode,
Expand Down Expand Up @@ -347,6 +347,12 @@ def _add_modules(self, reused_nodes_only=False):
holder_type=PytorchPreservingActivationQuantizationHolder,
**holder_kwargs)

elif node.is_fln_quantization():
holder_kwargs = {'quantization_bypass': True}
activation_quantizer_holder = self.get_activation_quantizer_holder(node,
holder_type=PytorchFLNActivationQuantizationHolder,
**holder_kwargs)

if activation_quantizer_holder is not None:
activation_quantizer_holder_name = node.name + '_' + ACTIVATION_HOLDER_QUANTIZER
self.add_module(activation_quantizer_holder_name, activation_quantizer_holder)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def get_inferable_quantizers(node: BaseNode,
weight_quantizer = get_weights_quantizer_for_node(node, attr)
weight_quantizers[attr] = weight_quantizer

if node.is_activation_quantization_enabled():
if node.is_activation_quantization_enabled() or node.is_fln_quantization():
num_of_outputs = len(node.output_shape) if isinstance(node.output_shape, list) else 1
activation_quantizers = [get_activations_quantizer_for_node(node)] * num_of_outputs

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
OpQuantizationConfig, QuantizationConfigOptions
from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.current_tpc import _current_tpc

from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG


class FrameworkQuantizationCapabilities(ImmutableClass):
Expand Down Expand Up @@ -113,8 +113,8 @@ def get_fusing_patterns(self) -> List[Dict[List[Any], OpQuantizationConfig]]:
ops = [self.get_layers_by_opset(x) for x in p.operator_groups]
res.extend(itertools.product(*ops))

fused_op_quant_config = getattr(p, FUSED_OP_QUANT_CONFIG, None)
patterns.extend({FUSED_LAYER_PATTERN: list(x), FUSED_OP_QUANT_CONFIG: fused_op_quant_config} for x in res)
fuse_op_quant_config = getattr(p, FUSE_OP_QUANT_CONFIG, None)
patterns.extend({FUSED_LAYER_PATTERN: list(x), FUSE_OP_QUANT_CONFIG: fuse_op_quant_config} for x in res)

return patterns

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,21 @@ def compare(self, inputs_list, quantized_model, qc, tpc):
self.unit_test.assertTrue(False, f'fail TFLite convertion with the following error: {error_msg}')

def run_network(self, inputs_list, qc, tpc):
from model_compression_toolkit.gptq.common.gptq_config import GradualActivationQuantizationConfig
def representative_data_gen():
for _ in range(self.num_calibration_iter):
yield inputs_list

core_config = mct.core.CoreConfig(quantization_config=qc)
if self.gptq:
arc = mct.gptq.GradientPTQConfig(n_epochs=2, optimizer=tf.keras.optimizers.Adam(
learning_rate=0.0001), optimizer_rest=tf.keras.optimizers.Adam(
learning_rate=0.0001), loss=multiple_tensors_mse_loss)
arc = mct.gptq.GradientPTQConfig(n_epochs=2,
optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
optimizer_rest=tf.keras.optimizers.Adam(learning_rate=0.0001),
loss=multiple_tensors_mse_loss,
train_bias=True,
hessian_weights_config=None,
gradual_activation_quantization_config= GradualActivationQuantizationConfig(),
regularization_factor=1)

ptq_model, quantization_info = mct.gptq.keras_gradient_post_training_quantization(
self.model_float,
Expand Down
2 changes: 1 addition & 1 deletion tests/keras_tests/non_parallel_tests/test_keras_tpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from keras import Input

import model_compression_toolkit as mct
from model_compression_toolkit.constants import TENSORFLOW, FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG
from model_compression_toolkit.constants import TENSORFLOW, FUSED_LAYER_PATTERN
from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL, IMX500_TP_MODEL, \
QNNPACK_TP_MODEL, TFLITE_TP_MODEL, KERNEL_ATTR, BIAS_ATTR, KERAS_KERNEL, BIAS, WEIGHTS_N_BITS
from model_compression_toolkit.core.keras.keras_implementation import KerasImplementation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pytest
from model_compression_toolkit.core.common.graph.base_graph import OutTensor

from model_compression_toolkit.constants import FLOAT_BITWIDTH, FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG
from model_compression_toolkit.constants import FLOAT_BITWIDTH, FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG
from model_compression_toolkit.core import ResourceUtilization
from model_compression_toolkit.core.common import Graph
from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfo
Expand Down Expand Up @@ -574,7 +574,7 @@ def test_compute_cuts_random_fusion_valid_utilization(self, seed, disable_quanti
if i + fuse_len <= num_nodes:
fused = tuple(nodes[j] for j in range(i, i + fuse_len))
fused_name = f"FusedNode_{'_'.join(n.name for n in fused)}"
fused_pattern = {FUSED_LAYER_PATTERN: [n.layer_class for n in fused], FUSED_OP_QUANT_CONFIG: None}
fused_pattern = {FUSED_LAYER_PATTERN: [n.layer_class for n in fused], FUSE_OP_QUANT_CONFIG: None}
fused_patterns.append(fused_pattern)
fused_data[fused_name] = fused
i += fuse_len
Expand Down
14 changes: 7 additions & 7 deletions tests_pytest/common_tests/unit_tests/core/test_fusion_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfoGenerator, FUSED_OP_ID_PREFIX, FusingInfo
from model_compression_toolkit.target_platform_capabilities import FrameworkQuantizationCapabilities
from model_compression_toolkit.core.common import BaseNode
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG
from mct_quantizers import QuantizationMethod

from tests.common_tests.helpers.generate_test_tpc import generate_test_attr_configs, generate_test_op_qc
Expand Down Expand Up @@ -51,8 +51,8 @@ def fusing_patterns():
"""
- Returns predefined fusing patterns: Conv2D + ReLU and Linear + Softmax.
"""
return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSED_OP_QUANT_CONFIG: None}]
return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSE_OP_QUANT_CONFIG: None}]


@pytest.fixture
Expand Down Expand Up @@ -249,10 +249,10 @@ def fusing_patterns_with_qconfig():
"""
- Returns predefined fusing patterns: Conv2D + ReLU and Conv2D + Tanh, Linear + Softmax.
"""
return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSED_OP_QUANT_CONFIG: TEST_QC_1},
{FUSED_LAYER_PATTERN: ["Conv2d", "Tanh"], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: ["Conv2d", "BatchNorm2d", "ReLU6"], FUSED_OP_QUANT_CONFIG: TEST_QC_2},
{FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSED_OP_QUANT_CONFIG: TEST_QC_3 }]
return [{FUSED_LAYER_PATTERN: ["Conv2d", "ReLU"], FUSE_OP_QUANT_CONFIG: TEST_QC_1},
{FUSED_LAYER_PATTERN: ["Conv2d", "Tanh"], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: ["Conv2d", "BatchNorm2d", "ReLU6"], FUSE_OP_QUANT_CONFIG: TEST_QC_2},
{FUSED_LAYER_PATTERN: ["Linear", "Softmax"], FUSE_OP_QUANT_CONFIG: TEST_QC_3 }]

@pytest.fixture
def fusing_info_generator_with_qconfig(fusing_patterns_with_qconfig):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from tests_pytest._test_util.graph_builder_utils import build_node
from tests_pytest.keras_tests.keras_test_util.keras_test_mixin import KerasFwMixin
import model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema as schema
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSED_OP_QUANT_CONFIG
from model_compression_toolkit.constants import FUSED_LAYER_PATTERN, FUSE_OP_QUANT_CONFIG

from tensorflow.keras import backend as K

Expand Down Expand Up @@ -53,7 +53,7 @@ class TestFusingConvRelu(BaseTestFusingInfoGeneratorKeras):
]

expected_fusing_patterns = [
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}
]

expected_fi = FusingInfo(
Expand Down Expand Up @@ -96,7 +96,7 @@ class TestFusingAnyActKeras(BaseTestFusingInfoGeneratorKeras):
]

expected_fusing_patterns = [
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}
]

expected_fi = FusingInfo(
Expand Down Expand Up @@ -153,7 +153,7 @@ class TestFusingConvReLUOnlyKeras(BaseTestFusingInfoGeneratorKeras):
]

expected_fusing_patterns = [
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}
]

expected_fi = FusingInfo(
Expand Down Expand Up @@ -220,12 +220,12 @@ class TestFusingComplexPatternsKeras(BaseTestFusingInfoGeneratorKeras):
]

expected_fusing_patterns = [
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[1]], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[2]], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[3]], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[4]], FUSED_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[5]], FUSED_OP_QUANT_CONFIG: None}
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[1]], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[2]], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[3]], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[4]], FUSE_OP_QUANT_CONFIG: None},
{FUSED_LAYER_PATTERN: [fusing_patterns[5]], FUSE_OP_QUANT_CONFIG: None}
]

expected_fi = FusingInfo(
Expand Down Expand Up @@ -313,7 +313,7 @@ class TestFusingConvSwishWithMultiSuccessorsKeras(BaseTestFusingInfoGeneratorKer
]

expected_fusing_patterns = [
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}
]

expected_fi = FusingInfo(
Expand Down Expand Up @@ -360,7 +360,7 @@ class TestFusingConvReluWithMultiPredecessorsKeras(BaseTestFusingInfoGeneratorKe
]

expected_fusing_patterns = [
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSED_OP_QUANT_CONFIG: None}
{FUSED_LAYER_PATTERN: [fusing_patterns[0]], FUSE_OP_QUANT_CONFIG: None}
]

expected_fi = FusingInfo(
Expand Down
142 changes: 142 additions & 0 deletions tests_pytest/pytorch_tests/e2e_tests/test_fln_quantization_holder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import model_compression_toolkit as mct
import torch
from mct_quantizers import PytorchActivationQuantizationHolder, PytorchFLNActivationQuantizationHolder

from tests_pytest._test_util.tpc_util import configure_mp_activation_opsets
from model_compression_toolkit.target_platform_capabilities.schema.v2 import QuantizationMethod, AttributeQuantizationConfig, \
OpQuantizationConfig, QuantizationConfigOptions, Signedness, OperatorSetNames, TargetPlatformCapabilities, Fusing, OperatorsSet
from tests.common_tests.helpers.generate_test_tpc import generate_test_attr_configs, generate_test_op_qc


def build_tpc():
default_op_cfg = OpQuantizationConfig(
default_weight_attr_config=AttributeQuantizationConfig(),
attr_weights_configs_mapping={},
activation_quantization_method=QuantizationMethod.POWER_OF_TWO,
activation_n_bits=8,
supported_input_activation_n_bits=[8],
enable_activation_quantization=True,
enable_weights_quantization=True,
quantization_preserving=False,
fixed_scale=None,
fixed_zero_point=None,
simd_size=32,
signedness=Signedness.AUTO
)

opsets, _ = configure_mp_activation_opsets(
opset_names=[OperatorSetNames.CONV,
OperatorSetNames.RELU,
OperatorSetNames.SIGMOID,
OperatorSetNames.FULLY_CONNECTED,
OperatorSetNames.HARDSWISH],
base_op_config=default_op_cfg,
a_nbits=[8]
)
default_cfg = QuantizationConfigOptions(quantization_configurations=[default_op_cfg])

test_qc = generate_test_op_qc(**generate_test_attr_configs(), activation_n_bits=16)

tpc = TargetPlatformCapabilities(
default_qco=default_cfg,
operator_set=opsets,
fusing_patterns=[
Fusing(operator_groups=(
OperatorsSet(name=OperatorSetNames.CONV),
OperatorsSet(name=OperatorSetNames.RELU)), fuse_op_quantization_config=test_qc),
Fusing(operator_groups=(
OperatorsSet(name=OperatorSetNames.CONV),
OperatorsSet(name=OperatorSetNames.SIGMOID))),
Fusing(operator_groups=(
OperatorsSet(name=OperatorSetNames.FULLY_CONNECTED),
OperatorsSet(name=OperatorSetNames.HARDSWISH)), fuse_op_quantization_config=test_qc),
]
)
return tpc

def representative_data_gen(shape=(3, 8, 8), num_inputs=1, batch_size=2, num_iter=1):
for _ in range(num_iter):
yield [torch.randn(batch_size, *shape)] * num_inputs

def get_float_model():
class BaseModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3)
self.relu = torch.nn.ReLU()
self.conv2 = torch.nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3)
self.sigmoid = torch.nn.Sigmoid()
self.flatten = torch.nn.Flatten()
self.fc = torch.nn.Linear(in_features=48, out_features=10)
self.hswish = torch.nn.Hardswish()

def forward(self, x):
x = self.conv1(x)
x = self.relu(x)
x = self.conv2(x)
x = self.sigmoid(x)
x = self.flatten(x)
x = self.fc(x)
x = self.hswish(x)
return x
return BaseModel()

def test_fln_quantization_holder():

float_model = get_float_model()
tpc = build_tpc()

quantized_model, _ = mct.ptq.pytorch_post_training_quantization(
in_module=float_model,
representative_data_gen=representative_data_gen,
target_platform_capabilities=tpc
)

# check conv1
assert hasattr(quantized_model, 'conv1_activation_holder_quantizer')
conv1_activation_holder_quantizer = quantized_model.conv1_activation_holder_quantizer
assert isinstance(conv1_activation_holder_quantizer, PytorchFLNActivationQuantizationHolder)
assert conv1_activation_holder_quantizer.quantization_bypass == True
assert conv1_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16

# check relu
assert hasattr(quantized_model, 'relu_activation_holder_quantizer')
relu_activation_holder_quantizer = quantized_model.relu_activation_holder_quantizer
assert isinstance(relu_activation_holder_quantizer, PytorchActivationQuantizationHolder)
assert relu_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8

# check conv2
assert not hasattr(quantized_model, 'conv2_activation_holder_quantizer')

# check sigmoid
assert hasattr(quantized_model, 'sigmoid_activation_holder_quantizer')
sigmoid_activation_holder_quantizer = quantized_model.sigmoid_activation_holder_quantizer
assert isinstance(sigmoid_activation_holder_quantizer, PytorchActivationQuantizationHolder)
assert sigmoid_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8

# check fc
assert hasattr(quantized_model, 'fc_activation_holder_quantizer')
fc_activation_holder_quantizer = quantized_model.fc_activation_holder_quantizer
assert isinstance(fc_activation_holder_quantizer, PytorchFLNActivationQuantizationHolder)
assert fc_activation_holder_quantizer.quantization_bypass == True
assert fc_activation_holder_quantizer.activation_holder_quantizer.num_bits == 16

# check hswish
assert hasattr(quantized_model, 'hswish_activation_holder_quantizer')
hswish_activation_holder_quantizer = quantized_model.hswish_activation_holder_quantizer
assert isinstance(hswish_activation_holder_quantizer, PytorchActivationQuantizationHolder)
assert hswish_activation_holder_quantizer.activation_holder_quantizer.num_bits == 8
Loading