remove weights_second_moment_correction and weights_bias_correction from NodeWeightsQuantizationConfig

irenab · irenab · commit 7d0b412e3138 · 2025-07-01T19:08:55.000+03:00
diff --git a/model_compression_toolkit/core/common/quantization/node_quantization_config.py b/model_compression_toolkit/core/common/quantization/node_quantization_config.py
@@ -18,7 +18,6 @@
 from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
 from model_compression_toolkit.logger import Logger
 
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.constants import POSITIONAL_ATTR
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import \
     AttributeQuantizationConfig, OpQuantizationConfig
@@ -64,6 +63,7 @@ def set_quant_config_attr(self, config_parameter_name: str, config_parameter_val
         if hasattr(self, config_parameter_name):
             setattr(self, config_parameter_name, config_parameter_value)
         else:
+            raise AttributeError(config_parameter_name)
             Logger.warning(f"Parameter {config_parameter_name} could not be found in the node quantization config and "
                            f"was not updated!")
 
@@ -272,14 +272,11 @@ def __init__(self,
 
                 self.attributes_config_mapping[attr] = WeightsAttrQuantizationConfig(weights_attr_cfg=attr_cfg,
                                                                                      weights_channels_axis=weights_channels_axis)
-        # TODO irena remove along with set_qc. Keeping for eq and hash to work without set_qc being called
+        # TODO this is set by batch norm reconstruction substitution when folded batch norms are added back, to mark
+        #  the nodes that the correction should be applied to (for some nodes it gets disabled) and BNs removed.
+        #  The actual correction is only computed when it's applied in ptq, so it seems that both substitutions could
+        #  be unified, and no info need to pass between.
         self.weights_second_moment_correction = None
-        self.weights_bias_correction = None
-
-    def set_qc(self, qc: QuantizationConfig):
-        # TODO irena: temporary keep the fields to not break everything at once.
-        self.weights_second_moment_correction = qc.weights_second_moment_correction
-        self.weights_bias_correction = qc.weights_bias_correction
 
     def get_attr_config(self, attr_name: 'WeightAttrT') -> WeightsAttrQuantizationConfig:
         """
@@ -435,8 +432,6 @@ def __eq__(self, other: Any) -> bool:
             return False  # pragma: no cover
 
         return self.simd_size == other.simd_size and \
-            self.weights_second_moment_correction == other.weights_second_moment_correction and \
-            self.weights_bias_correction == other.weights_bias_correction and \
             self.attributes_config_mapping.keys() == other.attributes_config_mapping.keys() and \
             all([self.attributes_config_mapping[k] == other.attributes_config_mapping[k]
                  for k in self.attributes_config_mapping.keys()]) and \
@@ -446,7 +441,5 @@ def __eq__(self, other: Any) -> bool:
 
     def __hash__(self):
         return hash((self.simd_size,
-                     self.weights_second_moment_correction,
-                     self.weights_bias_correction,
                      frozenset(self.attributes_config_mapping),
                      frozenset(self.pos_attributes_config_mapping)))
diff --git a/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py b/model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py
@@ -14,24 +14,20 @@
 # ==============================================================================
 import copy
 
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
-from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import AttributeQuantizationConfig
 
 
 def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
-                                   core_config: CoreConfig,
                                    fw_impl: FrameworkImplementation) -> Graph:
     """
     Get a graph, where each node has a final weights quantization configuration (with a bias
     correction term in it), and apply the bias correction for each node in the graph.
 
     Args:
         graph_to_apply_bias_correction: Graph to apply bias correction to.
-        core_config: CoreConfig containing parameters of how the model should be quantized.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
 
     Returns:
@@ -41,19 +37,16 @@ def apply_bias_correction_to_graph(graph_to_apply_bias_correction: Graph,
     graph = copy.deepcopy(graph_to_apply_bias_correction)
     for n in graph.nodes:
         # bias correction is only relevant for nodes with kernel op
-        if core_config.quantization_config.weights_bias_correction and n.kernel_attr is not None and \
-            n.is_weights_quantization_enabled(n.kernel_attr) and \
+        if n.kernel_attr is not None and n.is_weights_quantization_enabled(n.kernel_attr) and \
                 not n.final_weights_quantization_cfg.weights_second_moment_correction:
             # If a kernel was quantized and weights bias correction is enabled in n.quantization_cfg,
             # a bias correction term was calculated during model preparation, and is used now in the node's bias term.
-            if n.final_weights_quantization_cfg.weights_bias_correction:
-                _apply_bias_correction_to_node(n, fw_impl, core_config.quantization_config)
+            _apply_bias_correction_to_node(n, fw_impl)
     return graph
 
 
 def _apply_bias_correction_to_node(node: BaseNode,
-                                   fw_impl: FrameworkImplementation,
-                                   qc: QuantizationConfig):
+                                   fw_impl: FrameworkImplementation):
     """
     Set new bias to node using the bias-correction term that is stored in the
     final weights quantization configuration.
diff --git a/model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py b/model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py
@@ -74,8 +74,7 @@ def _compute_bias_correction_per_candidate_qc(node: BaseNode,
     """
 
     for candidate_qc in node.candidates_quantization_cfg:
-        if candidate_qc.weights_quantization_cfg.weights_bias_correction and not \
-                candidate_qc.weights_quantization_cfg.weights_second_moment_correction:
+        if not candidate_qc.weights_quantization_cfg.weights_second_moment_correction:
 
             quantized_kernel, io_channels_axes = get_quantized_weights_attr_by_qc(kernel_attr,
                                                                                   node,
diff --git a/model_compression_toolkit/core/common/statistics_correction/statistics_correction.py b/model_compression_toolkit/core/common/statistics_correction/statistics_correction.py
@@ -56,8 +56,9 @@ def statistics_correction_runner(transformed_graph: Graph,
     ########################################################
     # Compute bias correction to nodes' config candidates
     ########################################################
-    tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
-                                                    fw_impl)
+    if core_config.quantization_config.weights_bias_correction:
+        tg_with_bias = compute_bias_correction_of_graph(tg_with_bias,
+                                                        fw_impl)
 
     if tb_w is not None:
         tb_w.add_graph(tg_with_bias, 'statistics_computation')
@@ -96,7 +97,6 @@ def apply_statistics_correction(transformed_graph: Graph,
     #############################################
     if core_config.quantization_config.weights_bias_correction:
         transformed_graph = apply_bias_correction_to_graph(transformed_graph,
-                                                           core_config,
                                                            fw_impl=fw_impl)
     if tb_w is not None:
         tb_w.add_graph(transformed_graph, 'after_statistics_correction')
diff --git a/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py b/model_compression_toolkit/core/common/substitutions/batchnorm_reconstruction.py
@@ -20,7 +20,6 @@
 import numpy as np
 
 from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.quantization.node_quantization_config import WeightsAttrQuantizationConfig, \
     ActivationQuantizationMode
@@ -84,14 +83,10 @@ def substitute(self,
         # If the linear operator is part of a reused group (it is the "base" node, or a reused node),
         # we should skip the substitution.
         if source_node.is_reused():
-            for qc in source_node.candidates_quantization_cfg:
-                qc.weights_quantization_cfg.weights_second_moment_correction = False
             return graph
 
         # We apply only on nodes with folded BatchNormalization.
         if source_node.prior_info.std_output is None or source_node.prior_info.mean_output is None:
-            for qc in source_node.candidates_quantization_cfg:
-                qc.weights_quantization_cfg.weights_second_moment_correction = False
             return graph
 
         # This feature disabled for models with weights quantization method of Power of 2
@@ -103,10 +98,13 @@ def substitute(self,
                     == QuantizationMethod.POWER_OF_TWO):
                 Logger.warning("Second moment statistics correction feature disabled for models with weights "
                                "quantization method of Power of 2")
-                for qc_inner in source_node.candidates_quantization_cfg:
-                    qc_inner.weights_quantization_cfg.weights_second_moment_correction = False
                 return graph
 
+        # turn on second moment correction flag
+        def set_second_moment_correction(qc):
+            qc.weights_quantization_cfg.weights_second_moment_correction = True
+        source_node.quantization_cfg.update_all(set_second_moment_correction)
+
         eps = self.epsilon_val
 
         original_gamma = source_node.prior_info.std_output
diff --git a/model_compression_toolkit/core/graph_prep_runner.py b/model_compression_toolkit/core/graph_prep_runner.py
@@ -153,16 +153,10 @@ def get_finalized_graph(initial_graph: Graph,
     if bit_width_config:
         set_manual_bitwidth_config(graph, bit_width_config)
 
-    # TODO irena: load_fqc_configuration only loads config from tpc. Previously quant_config was read as well.
-    #  As a first stage we keep the attributes in internal configs and fill them manually from quant_config
-    #  not to break all the code at once. Eventually we need to handle quant_config directly, without injecting into candidates.
-    #  TODO 2: Also we adjust candidates for single precision, which we shouldn't do here.
-    def update(qc):
-        qc.weights_quantization_cfg.set_qc(quant_config)
+    # TODO irena: remove after base config is used
     for n in transformed_graph.nodes:
         if not mixed_precision_enable:
             n.quantization_cfg.candidates_quantization_cfg = [n.quantization_cfg.base_quantization_cfg]
-        n.quantization_cfg.update_all(update)
 
     ######################################
     # Channel equalization
diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/edit_qc_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/network_editor/edit_qc_test.py
@@ -224,7 +224,7 @@ class ChangeCandidatesWeightsQuantConfigAttrTest(BaseChangeQuantConfigAttrTest):
 
     def __init__(self, unit_test):
         edit_filter = NodeTypeFilter(layers.Conv2D)
-        action = ChangeCandidatesWeightsQuantConfigAttr(weights_bias_correction=False)
+        action = ChangeCandidatesWeightsQuantConfigAttr(weights_second_moment_correction=True)
         prepare_graph_func = prepare_graph_for_first_network_editor
         super().__init__(unit_test, edit_filter=edit_filter, action=action, prepare_graph_func=prepare_graph_func)
 
@@ -242,7 +242,7 @@ class ChangeFinalsWeightsQuantConfigAttrTest(BaseChangeQuantConfigAttrTest):
 
     def __init__(self, unit_test):
         edit_filter = NodeTypeFilter(layers.Conv2D)
-        action = ChangeFinalWeightsQuantConfigAttr(weights_bias_correction=False)
+        action = ChangeFinalWeightsQuantConfigAttr(weights_second_moment_correction=True)
         prepare_graph_func = prepare_graph_for_second_network_editor
         super().__init__(unit_test, edit_filter=edit_filter, action=action, prepare_graph_func=prepare_graph_func)
 
diff --git a/tests/keras_tests/feature_networks_tests/test_features_runner.py b/tests/keras_tests/feature_networks_tests/test_features_runner.py
@@ -182,7 +182,8 @@ def test_depthwise_conv2d_replacement(self):
         DwConv2dReplacementTest(self).run_test()
 
     def test_change_qc_attr(self):
-        ChangeFinalWeightQCAttrTest(self).run_test()
+        # there are no fields that can be changed in final cfg and have any effect (unless the whole attr cfgs mapping is overridden)
+        # ChangeFinalWeightQCAttrTest(self).run_test()
         ChangeFinalActivationQCAttrTest(self).run_test()
 
     def test_edit_candidate_qc(self):
diff --git a/tests/keras_tests/function_tests/test_node_quantization_configurations.py b/tests/keras_tests/function_tests/test_node_quantization_configurations.py
@@ -53,15 +53,6 @@ def test_weights_set_quant_config_attribute(self):
                                             node_attrs_list=[KERNEL, 0])
         og_nwc = copy.deepcopy(nwc)
 
-        # Updating a config parameter, not weights attribute parameter (no attr_name passed)
-        # TODO irena: weights_bias_correction should be removed
-        # self.assertTrue(nwc.weights_bias_correction)
-        nwc.set_quant_config_attr("weights_bias_correction", False)
-        self.assertFalse(nwc.weights_bias_correction)
-        self.assertFalse(nwc == og_nwc)
-
-        nwc = copy.deepcopy(og_nwc)
-
         # Updating an attribute parameter
         self.assertTrue(nwc.get_attr_config(KERNEL).weights_n_bits, 8)
         nwc.set_quant_config_attr("weights_n_bits", 4, attr_name=KERNEL)
diff --git a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
@@ -72,13 +72,8 @@ def dummy_representative_dataset():
         graph = load_fqc_configuration(graph=graph, fqc=fqc)
 
         for node in graph.nodes:
-            # TODO irena remove set_qc:
-            for c in node.quantization_cfg.candidates_quantization_cfg:
-                c.weights_quantization_cfg.set_qc(core_config.quantization_config)
-
             node.prior_info = keras_impl.get_node_prior_info(node=node,
                                                              graph=graph)
-
         mi = ModelCollector(graph,
                             fw_impl=keras_impl,
                             qc=core_config.quantization_config)