SonySemiconductorSolutions
diff --git a/‎model_compression_toolkit/core/common/framework_implementation.py‎
Lines changed: 6 additions & 33 deletions b/‎model_compression_toolkit/core/common/framework_implementation.py‎
Lines changed: 6 additions & 33 deletions
diff --git a/‎model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py‎
Lines changed: 22 additions & 3 deletions b/‎model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py‎
Lines changed: 22 additions & 3 deletions
diff --git a/‎model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py‎
Lines changed: 8 additions & 5 deletions b/‎model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py‎
Lines changed: 8 additions & 5 deletions
@@ -13,31 +13,31 @@
 # limitations under the License.
 # ==============================================================================
 from abc import ABC, abstractmethod
-from typing import Callable, Any, List, Tuple, Dict, Generator
+from typing import Callable, Any, List, Tuple, Generator, Type
 
 import numpy as np
 
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core import MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import BaseNode
-from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.graph.base_graph import Graph
-from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianInfoService
-from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
+from model_compression_toolkit.core.common.hessian import HessianScoresRequest
 from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
 from model_compression_toolkit.core.common.node_prior_info import NodePriorInfo
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
 from model_compression_toolkit.core.common.quantization.quantization_config import QuantizationConfig
-from model_compression_toolkit.core.common.user_info import UserInformation
 
 
 class FrameworkImplementation(ABC):
     """
     An abstract class with abstract methods that should be implemented when supporting a new
     framework in MCT.
     """
+    weights_quant_layer_cls: Type
+    activation_quant_layer_cls: Type
+    configurable_weights_quantizer_cls: Type
+    configurable_activation_quantizer_cls: Type
 
     @property
     def constants(self):
@@ -327,33 +327,6 @@ def get_substitutions_after_second_moment_correction(self, quant_config: Quantiz
                              f'framework\'s get_substitutions_after_second_moment_correction '
                              f'method.')  # pragma: no cover
 
-    @abstractmethod
-    def get_sensitivity_evaluator(self,
-                                  graph: Graph,
-                                  quant_config: MixedPrecisionQuantizationConfig,
-                                  representative_data_gen: Callable,
-                                  fw_info: FrameworkInfo,
-                                  hessian_info_service: HessianInfoService = None,
-                                  disable_activation_for_metric: bool = False) -> SensitivityEvaluation:
-        """
-        Creates and returns an object which handles the computation of a sensitivity metric for a mixed-precision
-        configuration (comparing to the float model).
-
-        Args:
-            graph: Graph to build its float and mixed-precision models.
-            quant_config: QuantizationConfig of how the model should be quantized.
-            representative_data_gen: Dataset to use for retrieving images for the models inputs.
-            fw_info: FrameworkInfo object with information about the specific framework's model.
-            disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
-            hessian_info_service: HessianInfoService to fetch information based on Hessian-approximation.
-
-        Returns:
-            A function that computes the metric.
-        """
-
-        raise NotImplementedError(f'{self.__class__.__name__} has to implement the '
-                             f'framework\'s get_sensitivity_evaluator method.')  # pragma: no cover
-
     def get_node_prior_info(self, node: BaseNode,
                             fw_info: FrameworkInfo,
                             graph: Graph) -> NodePriorInfo:
 
@@ -14,11 +14,23 @@
 # ==============================================================================
 
 from dataclasses import dataclass, field
+from enum import Enum
 from typing import List, Callable, Optional
 from model_compression_toolkit.constants import MP_DEFAULT_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
 from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting
 
 
+class MpMetricNormalization(Enum):
+    """
+    MAXBIT: normalize sensitivity metrics of layer candidates by max-bitwidth candidate (of that layer).
+    MINBIT: normalize sensitivity metrics of layer candidates by min-bitwidth candidate (of that layer).
+    NONE: no normalization.
+    """
+    MAXBIT = 'MAXBIT'
+    MINBIT = 'MINBIT'
+    NONE = 'NONE'
+
+
 @dataclass
 class MixedPrecisionQuantizationConfig:
     """
@@ -27,7 +39,6 @@ class MixedPrecisionQuantizationConfig:
     Args:
         compute_distance_fn (Callable): Function to compute a distance between two tensors. If None, using pre-defined distance methods based on the layer type for each layer.
         distance_weighting_method (MpDistanceWeighting): MpDistanceWeighting enum value that provides a function to use when weighting the distances among different layers when computing the sensitivity metric.
-        custom_metric_fn (Callable): Function to compute a custom metric. As input gets the model_mp and returns a float value for metric. If None, uses interest point metric.
         num_of_images (int): Number of images to use to evaluate the sensitivity of a mixed-precision model comparing to the float model.
         configuration_overwrite (List[int]): A list of integers that enables overwrite of mixed precision with a predefined one.
         num_interest_points_factor (float): A multiplication factor between zero and one (represents percentage) to reduce the number of interest points used to calculate the distance metric.
@@ -36,11 +47,16 @@ class MixedPrecisionQuantizationConfig:
         refine_mp_solution (bool): Whether to try to improve the final mixed-precision configuration using a greedy algorithm that searches layers to increase their bit-width, or not.
         metric_normalization_threshold (float): A threshold for checking the mixed precision distance metric values, In case of values larger than this threshold, the metric will be scaled to prevent numerical issues.
         hessian_batch_size (int): The Hessian computation batch size. used only if using mixed precision with Hessian-based objective.
-    """
+        metric_normalization (MpMetricNormalization): Metric normalization method.
+        metric_epsilon (float | None): ensure minimal distance between the metric for any non-max-bidwidth candidate
+          and a max-bitwidth candidate, i.e. metric(non-max-bitwidth) >= metric(max-bitwidth) + epsilon.
+          If none, the computed metrics are used as is.
+        custom_metric_fn (Callable): Function to compute a custom metric. As input gets the model_mp and returns a
+          float value for metric. If None, uses interest point metric.
 
+    """
     compute_distance_fn: Optional[Callable] = None
     distance_weighting_method: MpDistanceWeighting = MpDistanceWeighting.AVG
-    custom_metric_fn: Optional[Callable] = None
     num_of_images: int = MP_DEFAULT_NUM_SAMPLES
     configuration_overwrite: Optional[List[int]] = None
     num_interest_points_factor: float = field(default=1.0, metadata={"description": "Should be between 0.0 and 1.0"})
@@ -49,6 +65,9 @@ class MixedPrecisionQuantizationConfig:
     refine_mp_solution: bool = True
     metric_normalization_threshold: float = 1e10
     hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
+    metric_normalization: MpMetricNormalization = MpMetricNormalization.NONE
+    metric_epsilon: Optional[float] = 1e-6
+    custom_metric_fn: Optional[Callable] = None
     _is_mixed_precision_enabled: bool = field(init=False, default=False)
 
     def __post_init__(self):
 
@@ -25,6 +25,7 @@
     MixedPrecisionSearchManager
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     ResourceUtilization
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
 from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \
     greedy_solution_refinement_procedure
 
@@ -78,11 +79,12 @@ def search_bit_width(graph: Graph,
 
     # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
     # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
-    se = fw_impl.get_sensitivity_evaluator(
+    se = SensitivityEvaluation(
         graph,
         mp_config,
         representative_data_gen=representative_data_gen,
         fw_info=fw_info,
+        fw_impl=fw_impl,
         disable_activation_for_metric=disable_activation_for_metric,
         hessian_info_service=hessian_info_service)
 
@@ -96,10 +98,11 @@ def search_bit_width(graph: Graph,
 
     # Search manager and LP are highly coupled, so LP search method was moved inside search manager.
     search_manager = MixedPrecisionSearchManager(graph,
-                                                 fw_info,
-                                                 fw_impl,
-                                                 se,
-                                                 target_resource_utilization)
+                                                 fw_info=fw_info,
+                                                 fw_impl=fw_impl,
+                                                 sensitivity_evaluator=se,
+                                                 target_resource_utilization=target_resource_utilization,
+                                                 mp_config=mp_config)
     nodes_bit_cfg = search_manager.search()
 
     graph.skip_validation_check = False