From fb3a2e0ed7b9b40271cf441a237a00dee47bf952 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 2 Mar 2025 16:57:53 +0200
Subject: [PATCH 01/12] move virtual graph creation and search method call
 inside MP manager

---
 .../core/common/graph/base_graph.py           | 18 +++-
 .../core/common/graph/base_node.py            |  1 +
 .../mixed_precision_search_facade.py          | 43 +++------
 .../mixed_precision_search_manager.py         | 89 ++++++++++++-------
 .../search_methods/linear_programming.py      | 17 ++--
 .../solution_refinement_procedure.py          |  2 +-
 .../weights_mixed_precision_tests.py          | 12 +--
 7 files changed, 93 insertions(+), 89 deletions(-)

diff --git a/model_compression_toolkit/core/common/graph/base_graph.py b/model_compression_toolkit/core/common/graph/base_graph.py
index cb54aac0e..7914559e6 100644
--- a/model_compression_toolkit/core/common/graph/base_graph.py
+++ b/model_compression_toolkit/core/common/graph/base_graph.py
@@ -706,14 +706,24 @@ def update_fused_nodes(self, fusion: List[Any]):
         """
         self.fused_nodes.append(fusion)
 
-    def is_single_activation_cfg(self):
+    def has_any_configurable_activation(self) -> bool:
         """
-        Checks whether all nodes in the graph that have activation quantization are quantized with the same bit-width.
+        Checks whether any node in the graph has a configurable activation quantization.
 
-        Returns: True if all quantization config candidates of all nodes have the same activation quantization bit-width.
+        Returns:
+            Whether any node in the graph has a configurable activation quantization.
+        """
+        return any([n.has_configurable_activation() for n in self.nodes])
+
+    def has_any_configurable_weights(self):
+        """
+        Checks whether any node in the graph has any configurable weights quantization.
 
+        Returns:
+            Whether any node in the graph has any configurable weights quantization.
         """
-        return all([n.is_all_activation_candidates_equal() for n in self.nodes])
+
+        return any([n.has_any_configurable_weight() for n in self.nodes])
 
     def replace_node(self, node_to_replace: BaseNode, new_node: BaseNode):
         """
diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py
index 1dfd1e533..d867fe578 100644
--- a/model_compression_toolkit/core/common/graph/base_node.py
+++ b/model_compression_toolkit/core/common/graph/base_node.py
@@ -170,6 +170,7 @@ def is_configurable_weight(self, attr_name: str) -> bool:
     def has_any_configurable_weight(self) -> bool:
         """
         Check whether any of the node's weights is configurable.
+        
         Returns:
             Whether any of the node's weights is configurable.
         """
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
index 9a473cad0..41c1fdb32 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -34,16 +34,10 @@
 
 
 class BitWidthSearchMethod(Enum):
-    # When adding a new search_methods MP configuration method, these enum and factory dictionary
-    # should be updated with it's kind and a search_method implementation.
     INTEGER_PROGRAMMING = 0
 
 
-search_methods = {
-    BitWidthSearchMethod.INTEGER_PROGRAMMING: mp_integer_programming_search}
-
-
-def search_bit_width(graph_to_search_cfg: Graph,
+def search_bit_width(graph: Graph,
                      fw_info: FrameworkInfo,
                      fw_impl: FrameworkImplementation,
                      target_resource_utilization: ResourceUtilization,
@@ -60,7 +54,7 @@ def search_bit_width(graph_to_search_cfg: Graph,
     target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now.
 
     Args:
-        graph_to_search_cfg: Graph to search a MP configuration for.
+        graph: Graph to search a MP configuration for.
         fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         fw_impl: FrameworkImplementation object with specific framework methods implementation.
         target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
@@ -75,51 +69,34 @@ def search_bit_width(graph_to_search_cfg: Graph,
         bit-width index on the node).
 
     """
-
-    # target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now.
-    if target_resource_utilization is None:
-        Logger.critical("Target ResourceUtilization is required for the bit-width search method's configuration.")  # pragma: no cover
-
-    # Set graph for MP search
-    graph = copy.deepcopy(graph_to_search_cfg)  # Copy graph before searching
-    if target_resource_utilization.bops_restricted():
-        # TODO: we only need the virtual graph is both activations and weights are configurable
-        # Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search
-        graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling())
-
     # If we only run weights compression with MP than no need to consider activation quantization when computing the
     # MP metric (it adds noise to the computation)
     tru = target_resource_utilization
     weight_only_restricted = tru.weight_restricted() and not (tru.activation_restricted() or
                                                               tru.total_mem_restricted() or
                                                               tru.bops_restricted())
-    disable_activation_for_metric = weight_only_restricted or graph_to_search_cfg.is_single_activation_cfg()
+    disable_activation_for_metric = weight_only_restricted or not graph.has_any_configurable_activation()
 
     # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
     # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
     se = fw_impl.get_sensitivity_evaluator(
-        graph_to_search_cfg,
+        graph,
         mp_config,
         representative_data_gen=representative_data_gen,
         fw_info=fw_info,
         disable_activation_for_metric=disable_activation_for_metric,
         hessian_info_service=hessian_info_service)
 
-    # Instantiate a manager object
+    if search_method != BitWidthSearchMethod.INTEGER_PROGRAMMING:
+        raise NotImplementedError()
+
+    # Search manager and LP are highly coupled, so LP search method was moved inside search manager.
     search_manager = MixedPrecisionSearchManager(graph,
                                                  fw_info,
                                                  fw_impl,
                                                  se,
-                                                 target_resource_utilization,
-                                                 original_graph=graph_to_search_cfg)
-
-    if search_method not in search_methods:
-        raise NotImplementedError()  # pragma: no cover
-
-    search_method_fn = search_methods[search_method]
-    # Search for the desired mixed-precision configuration
-    result_bit_cfg = search_method_fn(search_manager,
-                                      target_resource_utilization)
+                                                 target_resource_utilization)
+    result_bit_cfg = search_manager.search()
 
     if mp_config.refine_mp_solution:
         result_bit_cfg = greedy_solution_refinement_procedure(result_bit_cfg, search_manager, target_resource_utilization)
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index 862896197..333a2a71e 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
 
 from typing import Callable, Dict, List
 
@@ -30,6 +31,7 @@
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
     MixedPrecisionRUHelper
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
+from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
 from model_compression_toolkit.logger import Logger
 
 
@@ -43,8 +45,7 @@ def __init__(self,
                  fw_info: FrameworkInfo,
                  fw_impl: FrameworkImplementation,
                  sensitivity_evaluator: SensitivityEvaluation,
-                 target_resource_utilization: ResourceUtilization,
-                 original_graph: Graph = None):
+                 target_resource_utilization: ResourceUtilization):
         """
 
         Args:
@@ -54,18 +55,21 @@ def __init__(self,
             sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
                 a bit-width configuration for the MP model.
             target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
-            original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
-                will contain the original graph (for config reconstruction purposes).
         """
 
-        self.graph = graph
-        self.original_graph = graph if original_graph is None else original_graph
         self.fw_info = fw_info
         self.fw_impl = fw_impl
+
+        self.original_graph = graph
+        # graph for mp search
+        self.mp_graph, self.using_virtual_graph = self._get_mp_graph(graph, target_resource_utilization)
+
         self.sensitivity_evaluator = sensitivity_evaluator
+        self.compute_metric_fn = sensitivity_evaluator.compute_metric
+        self.target_resource_utilization = target_resource_utilization
+
+        self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
         self.layer_to_bitwidth_mapping = self.get_search_space()
-        self.compute_metric_fn = self.get_sensitivity_metric()
-        self._cuts = None
 
         # To define RU Total constraints we need to compute weights and activations even if they have no constraints
         # TODO currently this logic is duplicated in linear_programming.py
@@ -74,16 +78,53 @@ def __init__(self,
             targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
         self.ru_targets_to_compute = targets
 
-        self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
-        self.target_resource_utilization = target_resource_utilization
-        self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
-        self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
+        self.ru_helper = MixedPrecisionRUHelper(self.mp_graph, fw_info, fw_impl)
+
+        self.min_ru_config = self.mp_graph.get_min_candidates_config(fw_info)
+        self.max_ru_config = self.mp_graph.get_max_candidates_config(fw_info)
         self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
         self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
 
-        self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
+        self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph,
                                                                        original_graph=self.original_graph)
 
+    def search(self):
+        """
+        Run mixed precision search.
+
+        Returns:
+            Indices of the selected bit-widths candidates.
+        """
+        # import here to prevent circular dependency
+        from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
+            mp_integer_programming_search
+        config = mp_integer_programming_search(self, self.target_resource_utilization)
+        if self.mp_graph is self.original_graph:
+            return config
+
+        return self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
+
+    def _get_mp_graph(self, graph, target_resource_utilization):
+        """
+        Get graph for mixed precision search. Virtual graph is built if bops is restricted and both activation and
+        weights are configurable.
+
+        Args:
+            graph: input graph.
+            target_resource_utilization: target resource utilization.
+
+        Returns:
+            Graph for mixed precision search (virtual or original).
+        """
+        if (target_resource_utilization.bops_restricted() and
+                graph.has_any_configurable_activation() and
+                graph.has_any_configurable_weights()):
+            mp_graph = substitute(copy.deepcopy(graph),
+                                  self.fw_impl.get_substitutions_virtual_weights_activation_coupling())
+            return mp_graph, True
+
+        return graph, False
+
     def get_search_space(self) -> Dict[int, List[int]]:
         """
         The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
@@ -94,26 +135,12 @@ def get_search_space(self) -> Dict[int, List[int]]:
         """
 
         indices_mapping = {}
-        nodes_to_configure = self.graph.get_configurable_sorted_nodes(self.fw_info)
-        for idx, n in enumerate(nodes_to_configure):
+        for idx, n in enumerate(self.mp_topo_configurable_nodes):
             # For each node, get all possible bitwidth indices for it
             # (which is a list from 0 to the length of the candidates mp_config list of the node).
             indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg)))  # all search_methods space
         return indices_mapping
 
-    def get_sensitivity_metric(self) -> Callable:
-        """
-
-        Returns: Return a function (from the framework implementation) to compute a metric that
-        indicates the similarity of the mixed-precision model (to the float model) for a given
-        mixed-precision configuration.
-
-        """
-        # Get from the framework an evaluation function on how a MP configuration,
-        # affects the expected loss.
-
-        return self.sensitivity_evaluator.compute_metric
-
     def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
         """
         Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
@@ -126,12 +153,8 @@ def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
             A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
             depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
         """
-        assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
-
-        configurable_sorted_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
-
         ru_matrix = []
-        for c, c_n in enumerate(configurable_sorted_nodes):
+        for c, c_n in enumerate(self.mp_topo_configurable_nodes):
             for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
                 if candidate_idx == self.min_ru_config[c]:
                     candidate_rus = self.min_ru[target]
diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
index 1a3b2102c..e6b772f68 100644
--- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
+++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
@@ -27,7 +27,7 @@
 
 
 def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
-                                  target_resource_utilization: ResourceUtilization = None) -> np.ndarray:
+                                  target_resource_utilization: ResourceUtilization) -> List[int]:
     """
     Searching and returning a mixed-precision configuration using an ILP optimization solution.
     It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
@@ -44,17 +44,13 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
         consumption).
 
     Returns:
-        The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
+        The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
 
     """
 
     # Build a mapping from each layer's index (in the model) to a dictionary that maps the
     # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
 
-    if target_resource_utilization is None or search_manager is None:
-        Logger.critical("Invalid parameters: 'target_resource_utilization' and 'search_manager' must not be 'None' "
-                        "for mixed-precision search. Ensure valid inputs are provided.")
-
     layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_resource_utilization)
 
     # Init variables to find their values when solving the lp problem.
@@ -82,10 +78,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
          in layer_to_indicator_vars_mapping.values()]
     ).flatten()
 
-    if target_resource_utilization.bops_restricted():
-        return search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
-    else:
-        return config
+    return config.tolist()
 
 
 def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
@@ -289,7 +282,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
     Logger.info('Starting to evaluate metrics')
     layer_to_metrics_mapping = {}
 
-    if target_resource_utilization.bops_restricted():
+    if search_manager.using_virtual_graph:
         origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
         max_config_value = search_manager.compute_metric_fn(origin_max_config)
     else:
@@ -310,7 +303,7 @@ def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
             mp_model_configuration[node_idx] = bitwidth_idx
 
             # Build a distance matrix using the function we got from the framework implementation.
-            if target_resource_utilization.bops_restricted():
+            if search_manager.using_virtual_graph:
                 # Reconstructing original graph's configuration from virtual graph's configuration
                 origin_mp_model_configuration = \
                     search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
diff --git a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py
index 16c49ad53..7277d662e 100644
--- a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py
+++ b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py
@@ -63,7 +63,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
                 # layer has max config in the given solution, nothing to optimize
                 continue
 
-            current_node = search_manager.graph.get_configurable_sorted_nodes(search_manager.fw_info)[node_idx]
+            current_node = search_manager.mp_topo_configurable_nodes[node_idx]
             node_candidates = current_node.candidates_quantization_cfg
 
             # only weights kernel attribute is quantized with weights mixed precision
diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py
index 252a38d9a..5e4c2401a 100644
--- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py
+++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py
@@ -259,7 +259,7 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert (quantization_info.mixed_precision_cfg == [1, 1]).all()
+        assert quantization_info.mixed_precision_cfg == [1, 1]
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 16)
@@ -300,7 +300,7 @@ def create_networks(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        self.unit_test.assertTrue((quantization_info.mixed_precision_cfg != 0).any())
+        self.unit_test.assertTrue(any(i for i in quantization_info.mixed_precision_cfg))
 
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
@@ -325,7 +325,7 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert (quantization_info.mixed_precision_cfg == [2, 2]).all()
+        assert quantization_info.mixed_precision_cfg == [2, 2]
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 4)
@@ -443,7 +443,7 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert (quantization_info.mixed_precision_cfg == [0, 1]).all()
+        assert quantization_info.mixed_precision_cfg == [0, 1]
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)
@@ -466,8 +466,8 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert any([(quantization_info.mixed_precision_cfg == [1, 0]).all(),
-                    (quantization_info.mixed_precision_cfg == [0, 1]).all()])
+        assert any([quantization_info.mixed_precision_cfg == [1, 0],
+                    quantization_info.mixed_precision_cfg == [0, 1]])
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)

From d663f28256d248040102c8c25fd48d9134ee4632 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 2 Mar 2025 19:40:17 +0200
Subject: [PATCH 02/12] fix test

---
 .../feature_networks_tests/feature_networks/qat/qat_test.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
index d9bbb6b8b..d3e343fe6 100644
--- a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
+++ b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
@@ -306,9 +306,9 @@ def run_test(self, **kwargs):
 
         self.compare(qat_ready_model, quantization_info)
 
-    def compare(self, qat_ready_model, quantization_info):
+    def _compare(self, qat_ready_model, quantization_info):
 
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_mp_cfg))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_mp_cfg)
 
         # check that quantizer gets multiple bits configuration
         for layer in qat_ready_model.layers:

From 213270859e0b2242158767c4f4a389908ca5af2c Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 2 Mar 2025 19:41:09 +0200
Subject: [PATCH 03/12] remove unused imports

---
 .../mixed_precision_search_facade.py             | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
index 41c1fdb32..a6f737606 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -13,24 +13,20 @@
 # limitations under the License.
 # ==============================================================================
 
-import copy
 from enum import Enum
-import numpy as np
-from typing import List, Callable, Dict
+from typing import List, Callable
 
 from model_compression_toolkit.core import MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.core.common.hessian import HessianInfoService
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
-from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
-    mp_integer_programming_search
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.hessian import HessianInfoService
+from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \
+    MixedPrecisionSearchManager
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
+    ResourceUtilization, RUTarget
 from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \
     greedy_solution_refinement_procedure
-from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
-from model_compression_toolkit.logger import Logger
 
 
 class BitWidthSearchMethod(Enum):

From 617e4762031a2f35026fdcbc4ca68f9356b8f1eb Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 2 Mar 2025 19:44:00 +0200
Subject: [PATCH 04/12] move sensitivity computation from linear_programming to
 MP search manager

---
 .../mixed_precision_search_manager.py         | 73 ++++++++++++++++-
 .../search_methods/linear_programming.py      | 79 +------------------
 2 files changed, 74 insertions(+), 78 deletions(-)

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index 333a2a71e..c245721c4 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -13,11 +13,13 @@
 # limitations under the License.
 # ==============================================================================
 import copy
+from tqdm import tqdm
 
-from typing import Callable, Dict, List
+from typing import Dict, List
 
 import numpy as np
 
+from model_compression_toolkit.constants import EPS
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
@@ -104,6 +106,75 @@ def search(self):
 
         return self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
 
+    def build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]:
+        """
+        This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
+        It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
+        For each node and some possible node's bitwidth (according to the given search space), we use
+        the framework function compute_metric_fn in order to infer
+        a batch of images, and compute (using the inference results) the sensitivity metric of
+        the configured mixed-precision model.
+
+        Args:
+            eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
+
+        Returns:
+            Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
+            the sensitivity of the model.
+
+        """
+
+        Logger.info('Starting to evaluate metrics')
+        layer_to_metrics_mapping = {}
+
+        if self.using_virtual_graph:
+            origin_max_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
+                self.max_ru_config)
+            max_config_value = self.compute_metric_fn(origin_max_config)
+        else:
+            max_config_value = self.compute_metric_fn(self.max_ru_config)
+
+        for node_idx, layer_possible_bitwidths_indices in tqdm(self.layer_to_bitwidth_mapping.items(),
+                                                               total=len(self.layer_to_bitwidth_mapping)):
+            layer_to_metrics_mapping[node_idx] = {}
+
+            for bitwidth_idx in layer_possible_bitwidths_indices:
+                if self.max_ru_config[node_idx] == bitwidth_idx:
+                    # This is a computation of the metric for the max configuration, assign pre-calculated value
+                    layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
+                    continue
+
+                # Create a configuration that differs at one layer only from the baseline model
+                mp_model_configuration = self.max_ru_config.copy()
+                mp_model_configuration[node_idx] = bitwidth_idx
+
+                # Build a distance matrix using the function we got from the framework implementation.
+                if self.using_virtual_graph:
+                    # Reconstructing original graph's configuration from virtual graph's configuration
+                    origin_mp_model_configuration = \
+                        self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
+                            mp_model_configuration,
+                            changed_virtual_nodes_idx=[node_idx],
+                            original_base_config=origin_max_config)
+                    origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
+                                                    c != origin_mp_model_configuration[i]]
+                    metric_value = self.compute_metric_fn(
+                        origin_mp_model_configuration,
+                        origin_changed_nodes_indices,
+                        origin_max_config)
+                else:
+                    metric_value = self.compute_metric_fn(
+                        mp_model_configuration,
+                        [node_idx],
+                        self.max_ru_config)
+
+                layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
+
+        # Finalize distance metric mapping
+        self.finalize_distance_metric(layer_to_metrics_mapping)
+
+        return layer_to_metrics_mapping
+
     def _get_mp_graph(self, graph, target_resource_utilization):
         """
         Get graph for mixed precision search. Virtual graph is built if bops is restricted and both activation and
diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
index e6b772f68..3b3bbc0da 100644
--- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
+++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
@@ -15,8 +15,7 @@
 
 import numpy as np
 from pulp import *
-from tqdm import tqdm
-from typing import Dict, Tuple, Any, Optional
+from typing import Dict, Tuple, Any
 
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
@@ -51,7 +50,7 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
     # Build a mapping from each layer's index (in the model) to a dictionary that maps the
     # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
 
-    layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_resource_utilization)
+    layer_to_metrics_mapping = search_manager.build_sensitivity_mapping()
 
     # Init variables to find their values when solving the lp problem.
     layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_metrics_mapping)
@@ -254,77 +253,3 @@ def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) ->
         return list(targets_ru_vec[target])
 
     raise ValueError(f'Unexpected target {target}.')    # pragma: no cover
-
-
-def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
-                                    target_resource_utilization: ResourceUtilization,
-                                    eps: float = EPS) -> Dict[int, Dict[int, float]]:
-    """
-    This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
-    It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
-    For each node and some possible node's bitwidth (according to the given search space), we use
-    the framework function compute_metric_fn in order to infer
-    a batch of images, and compute (using the inference results) the sensitivity metric of
-    the configured mixed-precision model.
-
-    Args:
-        search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
-        target_resource_utilization: ResourceUtilization to constrain our LP problem with some resources limitations
-        (like model' weights memory consumption).
-        eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
-
-    Returns:
-        Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
-        the sensitivity of the model.
-
-    """
-
-    Logger.info('Starting to evaluate metrics')
-    layer_to_metrics_mapping = {}
-
-    if search_manager.using_virtual_graph:
-        origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
-        max_config_value = search_manager.compute_metric_fn(origin_max_config)
-    else:
-        max_config_value = search_manager.compute_metric_fn(search_manager.max_ru_config)
-
-    for node_idx, layer_possible_bitwidths_indices in tqdm(search_manager.layer_to_bitwidth_mapping.items(),
-                                                           total=len(search_manager.layer_to_bitwidth_mapping)):
-        layer_to_metrics_mapping[node_idx] = {}
-
-        for bitwidth_idx in layer_possible_bitwidths_indices:
-            if search_manager.max_ru_config[node_idx] == bitwidth_idx:
-                # This is a computation of the metric for the max configuration, assign pre-calculated value
-                layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
-                continue
-
-            # Create a configuration that differs at one layer only from the baseline model
-            mp_model_configuration = search_manager.max_ru_config.copy()
-            mp_model_configuration[node_idx] = bitwidth_idx
-
-            # Build a distance matrix using the function we got from the framework implementation.
-            if search_manager.using_virtual_graph:
-                # Reconstructing original graph's configuration from virtual graph's configuration
-                origin_mp_model_configuration = \
-                    search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
-                        mp_model_configuration,
-                        changed_virtual_nodes_idx=[node_idx],
-                        original_base_config=origin_max_config)
-                origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
-                                                c != origin_mp_model_configuration[i]]
-                metric_value = search_manager.compute_metric_fn(
-                    origin_mp_model_configuration,
-                    origin_changed_nodes_indices,
-                    origin_max_config)
-            else:
-                metric_value = search_manager.compute_metric_fn(
-                    mp_model_configuration,
-                    [node_idx],
-                    search_manager.max_ru_config)
-
-            layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
-
-    # Finalize distance metric mapping
-    search_manager.finalize_distance_metric(layer_to_metrics_mapping)
-
-    return layer_to_metrics_mapping

From 266f3f97296e254640aa0c865fe46d122851cdeb Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 2 Mar 2025 19:59:11 +0200
Subject: [PATCH 05/12] remove separate computation for non-configurable nodes

---
 .../mixed_precision_ru_helper.py              | 53 ++++++-------------
 .../mixed_precision_search_manager.py         |  1 -
 .../search_methods/linear_programming.py      | 12 +----
 3 files changed, 19 insertions(+), 47 deletions(-)

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
index 400cbb9e0..2cfcef336 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
@@ -17,7 +17,7 @@
 import numpy as np
 
 from model_compression_toolkit.core import FrameworkInfo
-from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget
@@ -36,7 +36,7 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple
         self.fw_impl = fw_impl
         self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
 
-    def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
+    def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: List[int]) -> Dict[RUTarget, np.ndarray]:
         """
         Compute utilization of requested targets for a specific configuration in the format expected by LP problem
         formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
@@ -51,7 +51,7 @@ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[i
         """
 
         ru = {}
-        act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
+        act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg)
         if RUTarget.WEIGHTS in ru_targets:
             wu = self._weights_utilization(w_qcs)
             ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
@@ -71,7 +71,7 @@ def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[i
         return ru
 
     def get_quantization_candidates(self, mp_cfg) \
-            -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
+            -> Tuple[Dict[str, NodeActivationQuantizationConfig], Dict[str, NodeWeightsQuantizationConfig]]:
         """
         Retrieve quantization candidates objects for weights and activations from the configuration list.
 
@@ -88,70 +88,51 @@ def get_quantization_candidates(self, mp_cfg) \
         w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs
 
-    def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
+    def _weights_utilization(self, w_qcs: Dict[str, NodeWeightsQuantizationConfig]) -> Dict[str, float]:
         """
-        Compute weights utilization for configurable weights if configuration is passed,
-        or for non-configurable nodes otherwise.
+        Compute weights utilization for configurable weights.
 
         Args:
-            w_qcs: nodes quantization configuration to compute, or None.
+            w_qcs: nodes quantization configuration to compute.
 
         Returns:
             Weight utilization per node.
         """
-        if w_qcs:
-            target_criterion = TargetInclusionCriterion.QConfigurable
-            bitwidth_mode = BitwidthMode.QCustom
-        else:
-            target_criterion = TargetInclusionCriterion.QNonConfigurable
-            bitwidth_mode = BitwidthMode.QDefaultSP
-
-        _, nodes_util, _ = self.ru_calculator.compute_weights_utilization(target_criterion=target_criterion,
-                                                                          bitwidth_mode=bitwidth_mode,
+        _, nodes_util, _ = self.ru_calculator.compute_weights_utilization(target_criterion=TargetInclusionCriterion.AnyQuantized,
+                                                                          bitwidth_mode=BitwidthMode.QCustom,
                                                                           w_qcs=w_qcs)
         nodes_util = {n: u.bytes for n, u in nodes_util.items()}
         return nodes_util
 
-    def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
-            -> Optional[Dict[Any, float]]:
+    def _activation_utilization(self, act_qcs: Dict[str, NodeActivationQuantizationConfig]) -> Dict[Any, float]:
         """
-        Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
+        Compute activation utilization using MaxCut for all quantized nodes.
 
         Args:
-            act_qcs: nodes activation configuration or None.
+            act_qcs: nodes activation configuration.
 
         Returns:
-            Activation utilization per cut, or empty dict if no configuration was passed.
+            Activation utilization per cut.
         """
-        # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
-        # covered by the computation of configurable activations.
-        if not act_qcs:
-            return {}
-
         _, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
             TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
         cuts_util = {c: u.bytes for c, u in cuts_util.items()}
         return cuts_util
 
     def _bops_utilization(self,
-                          act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]],
-                          w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> np.ndarray:
+                          act_qcs: Optional[Dict[str, NodeActivationQuantizationConfig]],
+                          w_qcs: Optional[Dict[str, NodeWeightsQuantizationConfig]]) -> np.ndarray:
         """
         Computes a resource utilization vector with the respective bit-operations (BOPS) count
         according to the given mixed-precision configuration.
 
         Args:
-            act_qcs: nodes activation configuration or None.
-            w_qcs: nodes quantization configuration to compute, or None.
-              Either both are provided, or both are None.
+            act_qcs: nodes activation configuration.
+            w_qcs: nodes quantization configuration to compute.
 
         Returns:
             A vector of node's BOPS count.
         """
-        assert [act_qcs, w_qcs].count(None) in [0, 2], 'act_qcs and w_qcs should both be provided or both be None.'
-        if act_qcs is None:
-            return np.array([])
-
         _, detailed_bops = self.ru_calculator.compute_bops(TargetInclusionCriterion.Any, BitwidthMode.QCustom,
                                                            act_qcs=act_qcs, w_qcs=w_qcs)
         return np.array(list(detailed_bops.values()))
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index c245721c4..cbbbb3303 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -85,7 +85,6 @@ def __init__(self,
         self.min_ru_config = self.mp_graph.get_min_candidates_config(fw_info)
         self.max_ru_config = self.mp_graph.get_max_candidates_config(fw_info)
         self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
-        self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
 
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph,
                                                                        original_graph=self.original_graph)
diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
index 3b3bbc0da..73ba3f297 100644
--- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
+++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
@@ -162,8 +162,7 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
         _add_ru_constraints(search_manager=search_manager,
                             target_resource_utilization=target_resource_utilization,
                             indicators_matrix=indicators_matrix,
-                            lp_problem=lp_problem,
-                            non_conf_ru_dict=search_manager.non_conf_ru_dict)
+                            lp_problem=lp_problem)
     else:  # pragma: no cover
         Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
                         "A valid 'target_resource_utilization' is required.")
@@ -173,8 +172,7 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
 def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
                         target_resource_utilization: ResourceUtilization,
                         indicators_matrix: np.ndarray,
-                        lp_problem: LpProblem,
-                        non_conf_ru_dict: Dict[RUTarget, np.ndarray]):
+                        lp_problem: LpProblem):
     """
     Adding targets constraints for the Lp problem for the given target resource utilization.
     The update to the Lp problem object is done inplace.
@@ -184,7 +182,6 @@ def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
         target_resource_utilization: Target resource utilization.
         indicators_matrix: A diagonal matrix of the Lp problem's indicators.
         lp_problem: An Lp problem object to add constraint to.
-        non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
     """
     ru_indicated_vectors = {}
     # targets to add constraints for
@@ -203,11 +200,6 @@ def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
         # that would be required if that configuration is selected).
         # Each element in a vector is an lp object representing the configurations sum term for a memory element.
         ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
-
-        non_conf_ru_vec = non_conf_ru_dict[target]
-        if non_conf_ru_vec is not None and non_conf_ru_vec.size:
-            # add non-conf value as additional mem elements so that they get aggregated
-            ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
         ru_indicated_vectors[target] = ru_vec
 
     # Add constraints only for the restricted targets in target resource utilization.

From 5c619ad4ed82d05c7b6749a647f35016a5c89299 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Mon, 3 Mar 2025 09:27:33 +0200
Subject: [PATCH 06/12] fix tests

---
 .../feature_networks_tests/feature_networks/qat/qat_test.py   | 2 +-
 .../feature_models/mixed_precision_activation_test.py         | 2 +-
 .../feature_models/mixed_precision_weights_test.py            | 2 +-
 tests/pytorch_tests/model_tests/feature_models/qat_test.py    | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
index d3e343fe6..60b728065 100644
--- a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
+++ b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
@@ -306,7 +306,7 @@ def run_test(self, **kwargs):
 
         self.compare(qat_ready_model, quantization_info)
 
-    def _compare(self, qat_ready_model, quantization_info):
+    def compare(self, qat_ready_model, quantization_info):
 
         self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_mp_cfg)
 
diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py
index dfc9edb13..9e2fbf6d0 100644
--- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py
+++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py
@@ -72,7 +72,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info:
         raise NotImplementedError
 
     def verify_config(self, result_config, expected_config):
-        self.unit_test.assertTrue(all(result_config == expected_config),
+        self.unit_test.assertTrue(result_config == expected_config,
                                   f"Configuration mismatch: expected {expected_config} but got {result_config}.")
 
 
diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py
index f09fb5b53..1dd065fe0 100644
--- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py
+++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py
@@ -335,7 +335,7 @@ def get_resource_utilization(self):
         return ResourceUtilization(80)
 
     def compare(self, quantized_models, float_model, input_x=None, quantization_info=None):
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_config))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_config)
 
 
 class MixedPrecisionWeightsTestNet(torch.nn.Module):
diff --git a/tests/pytorch_tests/model_tests/feature_models/qat_test.py b/tests/pytorch_tests/model_tests/feature_models/qat_test.py
index 73154073c..e650feb44 100644
--- a/tests/pytorch_tests/model_tests/feature_models/qat_test.py
+++ b/tests/pytorch_tests/model_tests/feature_models/qat_test.py
@@ -289,7 +289,7 @@ def run_test(self):
                      input_x=self.representative_data_gen(),
                      quantization_info=quantization_info)
 
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == [1, 0, 0, 1, 0]))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1, 0, 0, 1, 0])
 
         # check that quantizer gets multiple bits configuration
         for _, layer in qat_ready_model.named_children():
@@ -336,7 +336,7 @@ def run_test(self):
                      quantization_info=quantization_info)
 
         # check that MP search doesn't return 8 bits configuration for all layers
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == [1, 1, 0, 0, 0]))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1, 1, 0, 0, 0])
 
         # check that quantizer gets multiple bits configuration
         for _, layer in qat_ready_model.named_children():

From 6fdd3d646cad333246a2cdbde68328650637e69c Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Mon, 3 Mar 2025 15:29:35 +0200
Subject: [PATCH 07/12] simplify ru constraints construction in mixed precision

---
 .../mixed_precision_ru_helper.py              |  99 ++++----------
 .../mixed_precision_search_facade.py          |   2 +
 .../mixed_precision_search_manager.py         |  54 ++++----
 .../resource_utilization.py                   |  32 +++--
 .../search_methods/linear_programming.py      | 128 ++++++------------
 .../test_lp_search_bitwidth.py                | 119 ++++++----------
 .../test_tensorboard_writer.py                |   3 +-
 7 files changed, 167 insertions(+), 270 deletions(-)

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
index 2cfcef336..56d969d1c 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Set, Dict, Optional, Tuple, Any
+from typing import List, Set, Dict, Optional, Tuple, Any, Union
 
 import numpy as np
 
@@ -38,37 +38,41 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple
 
     def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: List[int]) -> Dict[RUTarget, np.ndarray]:
         """
-        Compute utilization of requested targets for a specific configuration in the format expected by LP problem
-        formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
-        (between calls).
+        Compute utilization of requested targets for a specific configuration:
+          for weights and bops - total utilization,
+          for activations and total - utilization per cut.
 
         Args:
             ru_targets: resource utilization targets to compute.
             mp_cfg: a list of candidates indices for configurable layers.
 
         Returns:
-            Dict of the computed utilization per target.
+            Dict of the computed utilization per target, as 1d vector.
         """
-
-        ru = {}
         act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg)
-        if RUTarget.WEIGHTS in ru_targets:
-            wu = self._weights_utilization(w_qcs)
-            ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
-
-        if RUTarget.ACTIVATION in ru_targets:
-            au = self._activation_utilization(act_qcs)
-            ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
-
-        if RUTarget.BOPS in ru_targets:
-            ru[RUTarget.BOPS] = self._bops_utilization(act_qcs=act_qcs, w_qcs=w_qcs)
-
-        if RUTarget.TOTAL in ru_targets:
-            raise ValueError('Total target should be computed based on weights and activations targets.')
 
-        assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
-                                            f'Requested {ru_targets}')
-        return ru
+        ru, detailed_ru = self.ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized,
+                                                                          BitwidthMode.QCustom,
+                                                                          act_qcs=act_qcs,
+                                                                          w_qcs=w_qcs,
+                                                                          ru_targets=ru_targets,
+                                                                          allow_unused_qcs=True,
+                                                                          return_detailed=True)
+
+        ru_dict = {k: np.array([v]) for k, v in ru.get_resource_utilization_dict(restricted_only=True).items()}
+        # For activation and total we need utilization per cut, as different mp configurations might result in
+        # different cuts to be maximal.
+        for target in [RUTarget.ACTIVATION, RUTarget.TOTAL]:
+            if target in ru_dict:
+                ru_dict[target] = np.array(list(detailed_ru[target].values()))
+
+        assert all(v.ndim == 1 for v in ru_dict.values())
+        if RUTarget.ACTIVATION in ru_targets and RUTarget.TOTAL in ru_targets:
+            assert ru_dict[RUTarget.ACTIVATION].shape == ru_dict[RUTarget.TOTAL].shape
+
+        assert len(ru_dict) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
+                                                 f'Requested {ru_targets}')
+        return ru_dict
 
     def get_quantization_candidates(self, mp_cfg) \
             -> Tuple[Dict[str, NodeActivationQuantizationConfig], Dict[str, NodeWeightsQuantizationConfig]]:
@@ -87,52 +91,3 @@ def get_quantization_candidates(self, mp_cfg) \
         act_qcs = {n.name: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
         w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs
-
-    def _weights_utilization(self, w_qcs: Dict[str, NodeWeightsQuantizationConfig]) -> Dict[str, float]:
-        """
-        Compute weights utilization for configurable weights.
-
-        Args:
-            w_qcs: nodes quantization configuration to compute.
-
-        Returns:
-            Weight utilization per node.
-        """
-        _, nodes_util, _ = self.ru_calculator.compute_weights_utilization(target_criterion=TargetInclusionCriterion.AnyQuantized,
-                                                                          bitwidth_mode=BitwidthMode.QCustom,
-                                                                          w_qcs=w_qcs)
-        nodes_util = {n: u.bytes for n, u in nodes_util.items()}
-        return nodes_util
-
-    def _activation_utilization(self, act_qcs: Dict[str, NodeActivationQuantizationConfig]) -> Dict[Any, float]:
-        """
-        Compute activation utilization using MaxCut for all quantized nodes.
-
-        Args:
-            act_qcs: nodes activation configuration.
-
-        Returns:
-            Activation utilization per cut.
-        """
-        _, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
-            TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
-        cuts_util = {c: u.bytes for c, u in cuts_util.items()}
-        return cuts_util
-
-    def _bops_utilization(self,
-                          act_qcs: Optional[Dict[str, NodeActivationQuantizationConfig]],
-                          w_qcs: Optional[Dict[str, NodeWeightsQuantizationConfig]]) -> np.ndarray:
-        """
-        Computes a resource utilization vector with the respective bit-operations (BOPS) count
-        according to the given mixed-precision configuration.
-
-        Args:
-            act_qcs: nodes activation configuration.
-            w_qcs: nodes quantization configuration to compute.
-
-        Returns:
-            A vector of node's BOPS count.
-        """
-        _, detailed_bops = self.ru_calculator.compute_bops(TargetInclusionCriterion.Any, BitwidthMode.QCustom,
-                                                           act_qcs=act_qcs, w_qcs=w_qcs)
-        return np.array(list(detailed_bops.values()))
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
index a6f737606..93beee95d 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -65,6 +65,8 @@ def search_bit_width(graph: Graph,
         bit-width index on the node).
 
     """
+    assert target_resource_utilization.is_any_restricted()
+
     # If we only run weights compression with MP than no need to consider activation quantization when computing the
     # MP metric (it adds noise to the computation)
     tru = target_resource_utilization
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index cbbbb3303..1750cdf1f 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 # ==============================================================================
 import copy
+from collections import defaultdict
+
 from tqdm import tqdm
 
 from typing import Dict, List
@@ -73,18 +75,12 @@ def __init__(self,
         self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
         self.layer_to_bitwidth_mapping = self.get_search_space()
 
-        # To define RU Total constraints we need to compute weights and activations even if they have no constraints
-        # TODO currently this logic is duplicated in linear_programming.py
-        targets = target_resource_utilization.get_restricted_targets()
-        if RUTarget.TOTAL in targets:
-            targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
-        self.ru_targets_to_compute = targets
-
+        self.ru_targets = target_resource_utilization.get_restricted_targets()
         self.ru_helper = MixedPrecisionRUHelper(self.mp_graph, fw_info, fw_impl)
 
         self.min_ru_config = self.mp_graph.get_min_candidates_config(fw_info)
         self.max_ru_config = self.mp_graph.get_max_candidates_config(fw_info)
-        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
+        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets, self.min_ru_config)
 
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph,
                                                                        original_graph=self.original_graph)
@@ -100,10 +96,10 @@ def search(self):
         from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
             mp_integer_programming_search
         config = mp_integer_programming_search(self, self.target_resource_utilization)
-        if self.mp_graph is self.original_graph:
-            return config
 
-        return self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
+        if self.using_virtual_graph:
+            config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
+        return config
 
     def build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]:
         """
@@ -211,32 +207,32 @@ def get_search_space(self) -> Dict[int, List[int]]:
             indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg)))  # all search_methods space
         return indices_mapping
 
-    def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
+    def compute_resource_utilization_matrices(self) -> Dict[RUTarget, np.ndarray]:
         """
-        Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
+        Computes and builds a resource utilization matrix for all restricted targets, to be used for the
+        mixed-precision search problem formalization.
         Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
 
-        Args:
-            target: The resource target for which the resource utilization is calculated (a RUTarget value).
-
         Returns:
-            A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
-            depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
+            A dictionary containing resource utilization matrix of shape (num configurations, num memory elements)
+            per ru target. Num memory elements depends on the target, e.g. num cuts or 1 for cumulative metrics.
         """
-        ru_matrix = []
+        rus_per_candidate = defaultdict(list)
         for c, c_n in enumerate(self.mp_topo_configurable_nodes):
             for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
                 if candidate_idx == self.min_ru_config[c]:
-                    candidate_rus = self.min_ru[target]
+                    candidate_rus = self.min_ru
                 else:
-                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
+                    candidate_rus = self.compute_ru_for_candidate(c, candidate_idx)
 
-                ru_matrix.append(np.asarray(candidate_rus))
+                for target, ru in candidate_rus.items():
+                    rus_per_candidate[target].append(ru)
 
-        np_ru_matrix = np.array(ru_matrix) - self.min_ru[target]    # num configurations X num elements
-        return np_ru_matrix
+        # Each target contains a matrix of num configurations X num elements
+        relative_rus = {target: np.array(ru) - self.min_ru[target] for target, ru in rus_per_candidate.items()}
+        return relative_rus
 
-    def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
+    def compute_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int) -> Dict[RUTarget, np.ndarray]:
         """
         Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
         target configuration with the given candidate index.
@@ -244,13 +240,13 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int,
         Args:
             conf_node_idx: The index of a node in a sorted configurable nodes list.
             candidate_idx: Quantization config candidate to be used for the node's resource utilization computation.
-            target: The target for which the resource utilization is calculated (a RUTarget value).
 
-        Returns: Node's resource utilization vector.
+        Returns:
+            Node's resource utilization vector.
 
         """
         cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        return self.ru_helper.compute_utilization({target}, cfg)[target]
+        return self.ru_helper.compute_utilization(self.ru_targets, cfg)
 
     @staticmethod
     def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -284,7 +280,7 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
         act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
         ru = self.ru_helper.ru_calculator.compute_resource_utilization(
             target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
-            w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute, allow_unused_qcs=True)
+            w_qcs=w_qcs, ru_targets=self.ru_targets, allow_unused_qcs=True)
         return ru
 
     def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py
index d2746da1b..afb03f06a 100644
--- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py
+++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py
@@ -51,25 +51,34 @@ class ResourceUtilization:
     bops: float = np.inf
 
     def weight_restricted(self):
-        return self.weights_memory < np.inf
+        return self._is_restricted(self.weights_memory)
 
     def activation_restricted(self):
-        return self.activation_memory < np.inf
+        return self._is_restricted(self.activation_memory)
 
     def total_mem_restricted(self):
-        return self.total_memory < np.inf
+        return self._is_restricted(self.total_memory)
 
     def bops_restricted(self):
-        return self.bops < np.inf
+        return self._is_restricted(self.bops)
 
-    def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
+    def get_resource_utilization_dict(self, restricted_only: bool = False) -> Dict[RUTarget, float]:
         """
-        Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target.
+        Get resource utilization as a dictionary.
+
+        Args:
+            restricted_only: whether to include only targets with restricted utilization.
+
+        Returns:
+            A dictionary containing the resource utilization with targets as keys.
         """
-        return {RUTarget.WEIGHTS: self.weights_memory,
-                RUTarget.ACTIVATION: self.activation_memory,
-                RUTarget.TOTAL: self.total_memory,
-                RUTarget.BOPS: self.bops}
+        ru_dict = {RUTarget.WEIGHTS: self.weights_memory,
+                   RUTarget.ACTIVATION: self.activation_memory,
+                   RUTarget.TOTAL: self.total_memory,
+                   RUTarget.BOPS: self.bops}
+        if restricted_only:
+            ru_dict = {k: v for k, v in ru_dict.items() if self._is_restricted(v)}
+        return ru_dict
 
     def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool:
         """
@@ -114,3 +123,6 @@ def get_summary_str(self, restricted: bool):
         if RUTarget.BOPS in targets:
             summary.append(f"BOPS: {self.bops}")
         return ', '.join(summary)
+
+    def _is_restricted(self, v):
+        return v < np.inf
diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
index 73ba3f297..331bd6b00 100644
--- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
+++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
@@ -50,14 +50,14 @@ def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
     # Build a mapping from each layer's index (in the model) to a dictionary that maps the
     # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
 
-    layer_to_metrics_mapping = search_manager.build_sensitivity_mapping()
+    layer_to_sensitivity_mapping = search_manager.build_sensitivity_mapping()
 
     # Init variables to find their values when solving the lp problem.
-    layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_metrics_mapping)
+    layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_sensitivity_mapping)
 
     # Add all equations and inequalities that define the problem.
     lp_problem = _formalize_problem(layer_to_indicator_vars_mapping,
-                                    layer_to_metrics_mapping,
+                                    layer_to_sensitivity_mapping,
                                     layer_to_objective_vars_mapping,
                                     target_resource_utilization,
                                     search_manager)
@@ -150,28 +150,25 @@ def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVa
 
     # Bound the feasible solution space with the desired resource utilization values.
     # Creates separate constraints for weights utilization and activation utilization.
-    if target_resource_utilization is not None:
-        indicators = []
-        for layer in layer_to_metrics_mapping.keys():
-            for _, indicator in layer_to_indicator_vars_mapping[layer].items():
-                indicators.append(indicator)
-
-        indicators_arr = np.array(indicators)
-        indicators_matrix = np.diag(indicators_arr)
-
-        _add_ru_constraints(search_manager=search_manager,
-                            target_resource_utilization=target_resource_utilization,
-                            indicators_matrix=indicators_matrix,
-                            lp_problem=lp_problem)
-    else:  # pragma: no cover
-        Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
-                        "A valid 'target_resource_utilization' is required.")
+    assert target_resource_utilization and target_resource_utilization.is_any_restricted()
+
+    indicators = []
+    for layer in layer_to_metrics_mapping.keys():
+        for _, indicator in layer_to_indicator_vars_mapping[layer].items():
+            indicators.append(indicator)
+
+    indicators_vec = np.array(indicators)
+
+    _add_ru_constraints(search_manager=search_manager,
+                        target_resource_utilization=target_resource_utilization,
+                        indicators_vec=indicators_vec,
+                        lp_problem=lp_problem)
     return lp_problem
 
 
 def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
                         target_resource_utilization: ResourceUtilization,
-                        indicators_matrix: np.ndarray,
+                        indicators_vec: np.ndarray,
                         lp_problem: LpProblem):
     """
     Adding targets constraints for the Lp problem for the given target resource utilization.
@@ -180,68 +177,33 @@ def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
     Args:
         search_manager:  MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
         target_resource_utilization: Target resource utilization.
-        indicators_matrix: A diagonal matrix of the Lp problem's indicators.
+        indicators_vec: A vector of the Lp problem's indicators.
         lp_problem: An Lp problem object to add constraint to.
     """
-    ru_indicated_vectors = {}
-    # targets to add constraints for
-    constraints_targets = target_resource_utilization.get_restricted_targets()
-    # to add constraints for Total target we need to compute weight and activation
-    targets_to_compute = constraints_targets
-    if RUTarget.TOTAL in constraints_targets:
-        targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
-
-    for target in targets_to_compute:
-        ru_matrix = search_manager.compute_resource_utilization_matrix(target)    # num elements X num configurations
-        indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix)    # num elements X num configurations
-
-        # Sum the indicated values over all configurations, and add the value for minimal configuration once.
-        # Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
-        # that would be required if that configuration is selected).
-        # Each element in a vector is an lp object representing the configurations sum term for a memory element.
-        ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
-        ru_indicated_vectors[target] = ru_vec
-
-    # Add constraints only for the restricted targets in target resource utilization.
-    # Adding activation constraints modifies the lp term in ru_indicated_vectors, so if both activation and total
-    # are restricted we first add the constraints for total.
-    if RUTarget.TOTAL in constraints_targets and RUTarget.ACTIVATION in constraints_targets:
-        constraints_targets.remove(RUTarget.ACTIVATION)
-        constraints_targets = list(constraints_targets) + [RUTarget.ACTIVATION]
-    for target in constraints_targets:
-        target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
-        aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
-        for v in aggr_ru:
-            if isinstance(v, float):
-                if v > target_resource_utilization_value:
-                    Logger.critical(
-                        f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
-                        f"with the value {target_resource_utilization_value}.")  # pragma: no cover
-            else:
-                lp_problem += v <= target_resource_utilization_value
-
-
-def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
-    """
-    Aggregate resource utilization values for the LP.
-
-    Args:
-        targets_ru_vec: resource utilization vectors for all precomputed targets.
-        target: resource utilization target.
-
-    Returns:
-        Aggregated resource utilization.
-    """
-    if target == RUTarget.TOTAL:
-        w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
-        act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
-        return [w + v for v in act_ru_vec]
-
-    if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
-        return [lpSum(targets_ru_vec[target])]
-
-    if target == RUTarget.ACTIVATION:
-        # for max aggregation, each value constitutes a separate constraint
-        return list(targets_ru_vec[target])
-
-    raise ValueError(f'Unexpected target {target}.')    # pragma: no cover
+    candidates_ru = search_manager.compute_resource_utilization_matrices()
+    min_ru = search_manager.min_ru
+    target_ru = target_resource_utilization.get_resource_utilization_dict(restricted_only=True)
+    assert candidates_ru.keys() == target_ru.keys()
+
+    for target, ru_matrix in candidates_ru.items():
+        # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
+        # utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration)
+        assert ru_matrix.ndim == 2
+        if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
+            assert ru_matrix.shape[1] == 1
+
+        # ru values are relative to the minimal configuration, so we adjust the target ru accordingly
+        ru_constraint = target_ru[target] - min_ru[target]
+        if any(ru_constraint < 0):
+            raise ValueError(f"The model cannot be quantized to meet the specified target resource utilization "
+                             f"{target.value} with the value {target_ru[target]}.")
+
+        indicated_ru_matrix = ru_matrix.T * indicators_vec
+        # build lp sum term over all candidates
+        ru_vec = indicated_ru_matrix.sum(axis=1)
+
+        # For cumulative metrics a single constraint is added, for max metrics a separate constraint
+        # is added for each memory element (each element < target => max element < target).
+        assert len(ru_vec) == len(ru_constraint)
+        for v, c in zip(ru_vec, ru_constraint):
+            lp_problem += v <= c
diff --git a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
index 36de90950..d304b4f69 100644
--- a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
+++ b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
@@ -12,12 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from unittest.mock import Mock
+
 import numpy as np
 import unittest
 
 import keras
 from model_compression_toolkit.core import DEFAULTCONFIG
 from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting
+from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \
+    MixedPrecisionSearchManager
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     ResourceUtilization, RUTarget
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
@@ -57,32 +61,31 @@ def reconstruct_config_from_virtual_graph(self,
 
 
 class MockMixedPrecisionSearchManager:
-    def __init__(self, layer_to_ru_mapping):
+    def __init__(self, layer_to_ru_mapping, ru_targets):
+        self.ru_targets = ru_targets
         self.layer_to_bitwidth_mapping = {0: [0, 1, 2]}
         self.layer_to_ru_mapping = layer_to_ru_mapping
-        self.compute_metric_fn = lambda x, y=None, z=None: {0: 2, 1: 1, 2: 0}[x[0]]
-        self.min_ru = {RUTarget.WEIGHTS: [1],
-                       RUTarget.ACTIVATION: [1],
-                       RUTarget.BOPS: [1]}  # minimal resource utilization in the tests layer_to_ru_mapping
+        self.min_ru = {t: np.array([1]) for t in ru_targets} # minimal resource utilization in the tests layer_to_ru_mapping
 
         self.max_ru_config = [0]
         self.config_reconstruction_helper = MockReconstructionHelper()
-        self.non_conf_ru_dict = {RUTarget.WEIGHTS: None, RUTarget.ACTIVATION: None, RUTarget.BOPS: None}
 
-    def compute_resource_utilization_matrix(self, target):
-        # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test)
-        if target == RUTarget.WEIGHTS:
-            ru_matrix = [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
-        elif target == RUTarget.ACTIVATION:
-            ru_matrix = [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
-        elif target == RUTarget.BOPS:
-            ru_matrix = [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
-        else:
-            raise ValueError('Not supposed to get here')
-        return np.array(ru_matrix).T
+    def build_sensitivity_mapping(self):
+        return {0: {0: 0, 1: 1, 2: 2}}
 
-    def finalize_distance_metric(self, d):
-        return d
+    def compute_resource_utilization_matrices(self):
+        # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test)
+        ru = {
+            RUTarget.WEIGHTS:
+                [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))],
+            RUTarget.ACTIVATION:
+                [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))],
+            RUTarget.BOPS:
+                [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))],
+            RUTarget.TOTAL:
+                [np.flip(np.array([ru.total_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
+        }
+        return {k: np.array(v).T for k, v in ru.items() if k in self.ru_targets}
 
 
 class TestLpSearchBitwidth(unittest.TestCase):
@@ -92,7 +95,7 @@ def test_search_weights_only(self):
         layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1),
                                    1: ResourceUtilization(weights_memory=2),
                                    0: ResourceUtilization(weights_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS})
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
                                                 target_resource_utilization=target_resource_utilization)
@@ -106,42 +109,26 @@ def test_search_weights_only(self):
                                                     target_resource_utilization=target_resource_utilization)
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(weights_memory=np.inf))
+                                                target_resource_utilization=ResourceUtilization(weights_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 0)  # ResourceUtilization is Inf so expecting for the maximal bit-width result
+        self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
         target_resource_utilization = None  # target ResourceUtilization is not defined!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
-
-    def test_search_weights_only_with_non_conf(self):
-        target_resource_utilization = ResourceUtilization(weights_memory=2+11)
-        layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1),
-                                   1: ResourceUtilization(weights_memory=2),
-                                   0: ResourceUtilization(weights_memory=3)}
-                               }
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
-        mock_search_manager.non_conf_ru_dict = {RUTarget.WEIGHTS: np.array([5, 6])}
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+            mp_integer_programming_search(mock_search_manager,
+                                          target_resource_utilization=target_resource_utilization)
 
-        self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 1)
-
-        # make sure non_conf was taken into account and lower target has a different solution
-        target_resource_utilization = ResourceUtilization(weights_memory=2 + 10.9)
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
-        self.assertFalse(bit_cfg[0] == 1)
+        with self.assertRaises(Exception):
+            mp_integer_programming_search(mock_search_manager,
+                                          target_resource_utilization=ResourceUtilization(weights_memory=np.inf))
 
     def test_search_activation_only(self):
         target_resource_utilization = ResourceUtilization(activation_memory=2)
         layer_to_ru_mapping = {0: {2: ResourceUtilization(activation_memory=1),
                                    1: ResourceUtilization(activation_memory=2),
                                    0: ResourceUtilization(activation_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.ACTIVATION})
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
                                                 target_resource_utilization=target_resource_utilization)
@@ -156,17 +143,17 @@ def test_search_activation_only(self):
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
                                                 target_resource_utilization=ResourceUtilization(
-                                                    activation_memory=np.inf))
+                                                    activation_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 0)  # ResourceUtilization is Inf so expecting for the maximal bit-width result
+        self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
     def test_search_weights_and_activation(self):
         target_resource_utilization = ResourceUtilization(weights_memory=2, activation_memory=2)
         layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1),
                                    1: ResourceUtilization(weights_memory=2, activation_memory=2),
                                    0: ResourceUtilization(weights_memory=3, activation_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS, RUTarget.ACTIVATION})
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
                                                 target_resource_utilization=target_resource_utilization)
@@ -180,18 +167,18 @@ def test_search_weights_and_activation(self):
                                                     target_resource_utilization=target_resource_utilization)
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(weights_memory=np.inf,
-                                                                                                activation_memory=np.inf))
+                                                target_resource_utilization=ResourceUtilization(weights_memory=1000,
+                                                                                                activation_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 0)  # ResourceUtilization is Inf so expecting for the maximal bit-width result
+        self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
     def test_search_total_resource_utilization(self):
         target_resource_utilization = ResourceUtilization(total_memory=4)
-        layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1),
-                                   1: ResourceUtilization(weights_memory=2, activation_memory=2),
-                                   0: ResourceUtilization(weights_memory=3, activation_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1, total_memory=2),
+                                   1: ResourceUtilization(weights_memory=2, activation_memory=2, total_memory=4),
+                                   0: ResourceUtilization(weights_memory=3, activation_memory=3, total_memory=6)}}
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.TOTAL})
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
                                                 target_resource_utilization=target_resource_utilization)
@@ -204,7 +191,7 @@ def test_search_bops_ru(self):
         layer_to_ru_mapping = {0: {2: ResourceUtilization(bops=1),
                                    1: ResourceUtilization(bops=2),
                                    0: ResourceUtilization(bops=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.BOPS})
 
         bit_cfg = mp_integer_programming_search(mock_search_manager,
                                                 target_resource_utilization=target_resource_utilization)
@@ -272,32 +259,14 @@ def representative_data_gen():
                                              representative_data_gen,
                                              fw_info=fw_info)
 
-        cfg = search_bit_width(graph_to_search_cfg=graph,
+        cfg = search_bit_width(graph=graph,
                                fw_info=DEFAULT_KERAS_INFO,
                                fw_impl=keras_impl,
-                               target_resource_utilization=ResourceUtilization(np.inf),
+                               target_resource_utilization=ResourceUtilization(weights_memory=100),
                                mp_config=core_config.mixed_precision_config,
                                representative_data_gen=representative_data_gen,
                                search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING)
 
-        with self.assertRaises(Exception):
-            cfg = search_bit_width(graph_to_search_cfg=graph,
-                                   fw_info=DEFAULT_KERAS_INFO,
-                                   fw_impl=keras_impl,
-                                   target_resource_utilization=ResourceUtilization(np.inf),
-                                   mp_config=core_config.mixed_precision_config,
-                                   representative_data_gen=representative_data_gen,
-                                   search_method=None)
-
-        with self.assertRaises(Exception):
-            cfg = search_bit_width(graph_to_search_cfg=graph,
-                                   fw_info=DEFAULT_KERAS_INFO,
-                                   fw_impl=keras_impl,
-                                   target_resource_utilization=None,
-                                   mp_config=core_config.mixed_precision_config,
-                                   representative_data_gen=representative_data_gen,
-                                   search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING)
-
     def test_mixed_precision_search_facade(self):
         core_config_avg_weights = CoreConfig(quantization_config=DEFAULTCONFIG,
                                              mixed_precision_config=MixedPrecisionQuantizationConfig(compute_mse,
diff --git a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py
index 120ef70a9..7c830a4b6 100644
--- a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py
+++ b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py
@@ -162,7 +162,8 @@ def plot_tensor_sizes(self, core_config):
                                           fqc=fqc,
                                           network_editor=[],
                                           quant_config=cfg,
-                                          target_resource_utilization=mct.core.ResourceUtilization(),
+                                          target_resource_utilization=mct.core.ResourceUtilization(weights_memory=73,
+                                                                                                   activation_memory=191),
                                           n_iter=1,
                                           analyze_similarity=True,
                                           mp_cfg=mp_cfg)

From 91f2aa5010a54292d853569460d2cbeaff2b3bae Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Tue, 4 Mar 2025 18:50:40 +0200
Subject: [PATCH 08/12] fix test

---
 .../core/mixed_precision/test_greedy_solution_refinement.py   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py b/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py
index 5a5fbeee5..7d29842af 100644
--- a/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py
+++ b/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py
@@ -25,7 +25,7 @@
 @pytest.fixture
 def search_manager():
     manager = Mock()
-    manager.graph.get_configurable_sorted_nodes = MagicMock()
+    manager.mp_topo_configurable_nodes = MagicMock()
     manager.fw_info.get_kernel_op_attributes = MagicMock()
     manager.replace_config_in_index = MagicMock(
         side_effect=lambda config, idx, candidate: (
@@ -105,7 +105,7 @@ def test_greedy_solution_refinement_procedure(
     node_mock = Mock()
     node_mock.candidates_quantization_cfg = candidate_configs(weight_bits_dict_0, act_bits_0, weight_bits_dict_1, act_bits_1)
 
-    search_manager.graph.get_configurable_sorted_nodes.return_value = [node_mock]
+    search_manager.mp_topo_configurable_nodes = [node_mock]
 
     search_manager.compute_resource_utilization_for_config = MagicMock(side_effect=lambda config: {
         0: ResourceUtilization(**alternative_candidate_resources_usage),

From 56665422de428325b474e2ea0a61869a66d6a363 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 9 Mar 2025 18:30:42 +0200
Subject: [PATCH 09/12] tiny updates

---
 .../core/common/graph/base_node.py                   |  1 -
 .../mixed_precision/mixed_precision_ru_helper.py     |  2 +-
 .../mixed_precision/mixed_precision_search_facade.py |  2 +-
 .../mixed_precision_search_manager.py                | 12 +++++++-----
 .../resource_utilization_calculator.py               |  6 +-----
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/model_compression_toolkit/core/common/graph/base_node.py b/model_compression_toolkit/core/common/graph/base_node.py
index d867fe578..1dfd1e533 100644
--- a/model_compression_toolkit/core/common/graph/base_node.py
+++ b/model_compression_toolkit/core/common/graph/base_node.py
@@ -170,7 +170,6 @@ def is_configurable_weight(self, attr_name: str) -> bool:
     def has_any_configurable_weight(self) -> bool:
         """
         Check whether any of the node's weights is configurable.
-        
         Returns:
             Whether any of the node's weights is configurable.
         """
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
index 56d969d1c..4bd9134bb 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Set, Dict, Optional, Tuple, Any, Union
+from typing import List, Set, Dict, Tuple
 
 import numpy as np
 
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
index 93beee95d..4189cc37a 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -24,7 +24,7 @@
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \
     MixedPrecisionSearchManager
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
-    ResourceUtilization, RUTarget
+    ResourceUtilization
 from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \
     greedy_solution_refinement_procedure
 
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index 1750cdf1f..5ec783b11 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -67,9 +67,9 @@ def __init__(self,
         self.original_graph = graph
         # graph for mp search
         self.mp_graph, self.using_virtual_graph = self._get_mp_graph(graph, target_resource_utilization)
+        del graph  # so that it's not used by mistake
 
         self.sensitivity_evaluator = sensitivity_evaluator
-        self.compute_metric_fn = sensitivity_evaluator.compute_metric
         self.target_resource_utilization = target_resource_utilization
 
         self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
@@ -93,6 +93,7 @@ def search(self):
             Indices of the selected bit-widths candidates.
         """
         # import here to prevent circular dependency
+        # TODO: remove search manager dependency from linear_programming
         from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
             mp_integer_programming_search
         config = mp_integer_programming_search(self, self.target_resource_utilization)
@@ -122,12 +123,13 @@ def build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, flo
         Logger.info('Starting to evaluate metrics')
         layer_to_metrics_mapping = {}
 
+        compute_metric = self.sensitivity_evaluator.compute_metric
         if self.using_virtual_graph:
             origin_max_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
                 self.max_ru_config)
-            max_config_value = self.compute_metric_fn(origin_max_config)
+            max_config_value = compute_metric(origin_max_config)
         else:
-            max_config_value = self.compute_metric_fn(self.max_ru_config)
+            max_config_value = compute_metric(self.max_ru_config)
 
         for node_idx, layer_possible_bitwidths_indices in tqdm(self.layer_to_bitwidth_mapping.items(),
                                                                total=len(self.layer_to_bitwidth_mapping)):
@@ -153,12 +155,12 @@ def build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, flo
                             original_base_config=origin_max_config)
                     origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
                                                     c != origin_mp_model_configuration[i]]
-                    metric_value = self.compute_metric_fn(
+                    metric_value = compute_metric(
                         origin_mp_model_configuration,
                         origin_changed_nodes_indices,
                         origin_max_config)
                 else:
-                    metric_value = self.compute_metric_fn(
+                    metric_value = compute_metric(
                         mp_model_configuration,
                         [node_idx],
                         self.max_ru_config)
diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
index 408e5a598..07f350d53 100644
--- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
+++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
@@ -431,8 +431,7 @@ def compute_node_activation_tensor_utilization(self,
         Returns:
             Node's activation utilization.
         """
-        if qc and bitwidth_mode != BitwidthMode.QCustom:
-            raise ValueError(self.unexpected_qc_error)
+        self._validate_custom_qcs(qc, bitwidth_mode)
 
         if target_criterion:
             # only check whether the node meets the criterion
@@ -470,9 +469,6 @@ def compute_bops(self,
             - Total BOPS count of the network.
             - Detailed BOPS count per node.
         """
-        self._validate_custom_qcs(act_qcs, bitwidth_mode)
-        self._validate_custom_qcs(w_qcs, bitwidth_mode)
-
         nodes_bops = {}
         for n in self.graph.get_topo_sorted_nodes():
             w_qc = w_qcs.get(n.name) if w_qcs else None

From 42aeae853dbd38955c7d6095159bfcbff230e111 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 16 Mar 2025 18:53:15 +0200
Subject: [PATCH 10/12] convert LP functions into class, remove dependency on
 MPSearchManager, call LP from MPSearchManager with precomputed metrics

---
 .../mixed_precision_search_manager.py         |  43 ++-
 .../search_methods/linear_programming.py      | 318 ++++++++----------
 2 files changed, 168 insertions(+), 193 deletions(-)

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index 5ec783b11..124e0e317 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -34,6 +34,8 @@
     TargetInclusionCriterion, BitwidthMode
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
     MixedPrecisionRUHelper
+from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
+    MixedPrecisionIntegerLPSolver
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
 from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
 from model_compression_toolkit.logger import Logger
@@ -85,24 +87,49 @@ def __init__(self,
         self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph,
                                                                        original_graph=self.original_graph)
 
-    def search(self):
+    def search(self) -> List[int]:
         """
         Run mixed precision search.
 
         Returns:
             Indices of the selected bit-widths candidates.
         """
-        # import here to prevent circular dependency
-        # TODO: remove search manager dependency from linear_programming
-        from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
-            mp_integer_programming_search
-        config = mp_integer_programming_search(self, self.target_resource_utilization)
+        candidates_sensitivity = self._build_sensitivity_mapping()
+        candidates_ru = self._compute_relative_ru_matrices()
+        rel_target_ru = self._get_relative_ru_constraint_per_mem_element()
+        solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, rel_target_ru)
+        config = solver.run()
 
         if self.using_virtual_graph:
             config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
         return config
 
-    def build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]:
+    def _get_relative_ru_constraint_per_mem_element(self) -> Dict[RUTarget, np.ndarray]:
+        """
+        Computes resource utilization constraint with respect to the minimal bit configuration, i.e. corresponding
+        constraint for each memory element is the relative utilization between the target utilization and
+        element's utilization for min-bit configuration.
+
+        Returns:
+            A dictionary of relative resource utilization constraints per ru target.
+
+        Raises:
+            ValueError: if target resource utilization cannot be satisfied (utilization for the minimal bit
+              configuration exceeds the requested target utilization for any target).
+        """
+        target_ru = self.target_resource_utilization.get_resource_utilization_dict(restricted_only=True)
+        rel_target_ru = {
+            ru_target: ru - self.min_ru[ru_target] for ru_target, ru in target_ru.items()
+        }
+        unsatisfiable_targets = {
+            ru_target.value: target_ru[ru_target] for ru_target, ru in rel_target_ru.items() if any(ru < 0)
+        }
+        if unsatisfiable_targets:
+            raise ValueError(f"The model cannot be quantized to meet the specified resource utilization for the "
+                             f"following targets: {unsatisfiable_targets}")
+        return rel_target_ru
+
+    def _build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]:
         """
         This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
         It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
@@ -209,7 +236,7 @@ def get_search_space(self) -> Dict[int, List[int]]:
             indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg)))  # all search_methods space
         return indices_mapping
 
-    def compute_resource_utilization_matrices(self) -> Dict[RUTarget, np.ndarray]:
+    def _compute_relative_ru_matrices(self) -> Dict[RUTarget, np.ndarray]:
         """
         Computes and builds a resource utilization matrix for all restricted targets, to be used for the
         mixed-precision search problem formalization.
diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
index 331bd6b00..4e5155ad4 100644
--- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
+++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
@@ -12,198 +12,146 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 import numpy as np
 from pulp import *
-from typing import Dict, Tuple, Any
+from typing import Dict, Tuple
 
-from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
-from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
 
 # Limit ILP solver runtime in seconds
 SOLVER_TIME_LIMIT = 60
 
 
-def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
-                                  target_resource_utilization: ResourceUtilization) -> List[int]:
-    """
-    Searching and returning a mixed-precision configuration using an ILP optimization solution.
-    It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
-    bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
-    Then, it creates a mapping from each node's index (in the graph) to a dictionary
-    that maps the bitwidth index to the contribution of configuring this node with this
-    bitwidth to the minimal possible resource utilization of the model.
-    Then, and using these mappings, it builds an LP problem and finds an optimal solution.
-    If a solution could not be found, exception is thrown.
-
-    Args:
-        search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
-        target_resource_utilization: Target resource utilization to constrain our LP problem with some resources limitations (like model' weights memory
-        consumption).
-
-    Returns:
-        The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
-
-    """
-
-    # Build a mapping from each layer's index (in the model) to a dictionary that maps the
-    # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
-
-    layer_to_sensitivity_mapping = search_manager.build_sensitivity_mapping()
-
-    # Init variables to find their values when solving the lp problem.
-    layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_sensitivity_mapping)
-
-    # Add all equations and inequalities that define the problem.
-    lp_problem = _formalize_problem(layer_to_indicator_vars_mapping,
-                                    layer_to_sensitivity_mapping,
-                                    layer_to_objective_vars_mapping,
-                                    target_resource_utilization,
-                                    search_manager)
-
-    # Use default PULP solver. Limit runtime in seconds
-    solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
-    lp_problem.solve(solver=solver)  # Try to solve the problem.
-
-    assert lp_problem.status == LpStatusOptimal, Logger.critical(
-        "No solution was found during solving the LP problem")
-    Logger.info(f"ILP status: {LpStatus[lp_problem.status]}")
-
-    # Take the bitwidth index only if its corresponding indicator is one.
-    config = np.asarray(
-        [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
-         nbits_to_indicator
-         in layer_to_indicator_vars_mapping.values()]
-    ).flatten()
-
-    return config.tolist()
-
-
-def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
-    Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
-    """
-    Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
-    and a variable for each indicator for whether we use the former variable or not.
-
-    Args:
-        layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
-        bitwidth index to the observed sensitivity of the model.
-
-    Returns:
-        A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
-        and the second for indicators for each variable.
-    """
-
-    layer_to_indicator_vars_mapping = dict()
-    layer_to_objective_vars_mapping = dict()
-
-    for layer, nbits_to_metric in layer_to_metrics_mapping.items():
-        layer_to_indicator_vars_mapping[layer] = dict()
-
-        for nbits in nbits_to_metric.keys():
-            layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
-                                                                       lowBound=0,
-                                                                       upBound=1,
-                                                                       cat=LpInteger)
-
-        layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
-
-    return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
-
-
-def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVariable]],
-                       layer_to_metrics_mapping: Dict[int, Dict[int, float]],
-                       layer_to_objective_vars_mapping: Dict[int, LpVariable],
-                       target_resource_utilization: ResourceUtilization,
-                       search_manager: MixedPrecisionSearchManager) -> LpProblem:
-    """
-    Formalize the LP problem by defining all inequalities that define the solution space.
-
-    Args:
-        layer_to_indicator_vars_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to
-        indicator variable.
-        layer_to_metrics_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to sensitivity
-        evaluation.
-        layer_to_objective_vars_mapping: Dictionary that maps each node's index to a bitwidth variable we find its
-        value.
-        target_resource_utilization: Target resource utilization to reduce our feasible solution space.
-        search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
-
-    Returns:
-        The formalized LP problem.
-    """
-
-    lp_problem = LpProblem()  # minimization problem by default
-    lp_problem += lpSum([layer_to_objective_vars_mapping[layer] for layer in
-                         layer_to_metrics_mapping.keys()])  # Objective (minimize acc loss)
-
-    for layer in layer_to_metrics_mapping.keys():
-        # Use every bitwidth for every layer with its indicator.
-        lp_problem += lpSum([indicator * layer_to_metrics_mapping[layer][nbits]
-                             for nbits, indicator in layer_to_indicator_vars_mapping[layer].items()]) == \
-                      layer_to_objective_vars_mapping[layer]
-
-        # Constraint of only one indicator==1
-        lp_problem += lpSum(
-            [v for v in layer_to_indicator_vars_mapping[layer].values()]) == 1
-
-    # Bound the feasible solution space with the desired resource utilization values.
-    # Creates separate constraints for weights utilization and activation utilization.
-    assert target_resource_utilization and target_resource_utilization.is_any_restricted()
-
-    indicators = []
-    for layer in layer_to_metrics_mapping.keys():
-        for _, indicator in layer_to_indicator_vars_mapping[layer].items():
-            indicators.append(indicator)
+class MixedPrecisionIntegerLPSolver:
+    """ Integer Linear Programming solver for Mixed Precision.
 
-    indicators_vec = np.array(indicators)
-
-    _add_ru_constraints(search_manager=search_manager,
-                        target_resource_utilization=target_resource_utilization,
-                        indicators_vec=indicators_vec,
-                        lp_problem=lp_problem)
-    return lp_problem
-
-
-def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
-                        target_resource_utilization: ResourceUtilization,
-                        indicators_vec: np.ndarray,
-                        lp_problem: LpProblem):
-    """
-    Adding targets constraints for the Lp problem for the given target resource utilization.
-    The update to the Lp problem object is done inplace.
-
-    Args:
-        search_manager:  MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
-        target_resource_utilization: Target resource utilization.
-        indicators_vec: A vector of the Lp problem's indicators.
-        lp_problem: An Lp problem object to add constraint to.
+        Args:
+            layer_to_sensitivity_mapping: sensitivity per candidate per layer.
+            candidates_ru: resource utilization per candidate.
+            ru_constraints: resource utilization constraints corresponding to 'candidates_ru'.
     """
-    candidates_ru = search_manager.compute_resource_utilization_matrices()
-    min_ru = search_manager.min_ru
-    target_ru = target_resource_utilization.get_resource_utilization_dict(restricted_only=True)
-    assert candidates_ru.keys() == target_ru.keys()
-
-    for target, ru_matrix in candidates_ru.items():
-        # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
-        # utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration)
-        assert ru_matrix.ndim == 2
-        if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
-            assert ru_matrix.shape[1] == 1
-
-        # ru values are relative to the minimal configuration, so we adjust the target ru accordingly
-        ru_constraint = target_ru[target] - min_ru[target]
-        if any(ru_constraint < 0):
-            raise ValueError(f"The model cannot be quantized to meet the specified target resource utilization "
-                             f"{target.value} with the value {target_ru[target]}.")
-
-        indicated_ru_matrix = ru_matrix.T * indicators_vec
-        # build lp sum term over all candidates
-        ru_vec = indicated_ru_matrix.sum(axis=1)
-
-        # For cumulative metrics a single constraint is added, for max metrics a separate constraint
-        # is added for each memory element (each element < target => max element < target).
-        assert len(ru_vec) == len(ru_constraint)
-        for v, c in zip(ru_vec, ru_constraint):
-            lp_problem += v <= c
+    def __init__(self, layer_to_sensitivity_mapping: Dict[int, Dict[int, float]],
+                 candidates_ru: Dict[RUTarget, np.ndarray],
+                 ru_constraints: Dict[RUTarget, np.ndarray]):
+        self.layer_to_sensitivity_mapping = layer_to_sensitivity_mapping
+        self.candidates_ru = candidates_ru
+        self.ru_constraints = ru_constraints
+
+        self.layer_to_indicator_vars_mapping, self.layer_to_objective_vars_mapping = (
+            self._init_problem_vars(layer_to_sensitivity_mapping))
+
+    def run(self) -> List[int]:
+        """
+        Build and solve an ILP optimization problem.
+
+        Returns:
+            The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
+
+        """
+        # Add all equations and inequalities that define the problem.
+        lp_problem = self._formalize_problem()
+
+        # Use default PULP solver. Limit runtime in seconds
+        solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
+        lp_problem.solve(solver=solver)  # Try to solve the problem.
+
+        if lp_problem.status != LpStatusOptimal:
+            raise RuntimeError(f'No solution was found for the LP problem, with status {lp_problem.status}')
+
+        # Take the bitwidth index only if its corresponding indicator is one.
+        config = np.asarray(
+            [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
+             nbits_to_indicator
+             in self.layer_to_indicator_vars_mapping.values()]
+        ).flatten()
+
+        return config.tolist()
+
+    @staticmethod
+    def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
+        Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
+        """
+        Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
+        and a variable for each indicator for whether we use the former variable or not.
+
+        Args:
+            layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
+            bitwidth index to the observed sensitivity of the model.
+
+        Returns:
+            A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
+            and the second for indicators for each variable.
+        """
+
+        layer_to_indicator_vars_mapping = dict()
+        layer_to_objective_vars_mapping = dict()
+
+        for layer, nbits_to_metric in layer_to_metrics_mapping.items():
+            layer_to_indicator_vars_mapping[layer] = dict()
+
+            for nbits in nbits_to_metric.keys():
+                layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
+                                                                           lowBound=0,
+                                                                           upBound=1,
+                                                                           cat=LpInteger)
+
+            layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
+
+        return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
+
+    def _formalize_problem(self) -> LpProblem:
+        """
+        Formalize the LP problem by defining all inequalities that define the solution space.
+
+        Returns:
+            The formalized LP problem.
+        """
+
+        lp_problem = LpProblem()  # minimization problem by default
+        lp_problem += lpSum([self.layer_to_objective_vars_mapping[layer] for layer in
+                             self.layer_to_sensitivity_mapping.keys()])  # Objective (minimize acc loss)
+
+        for layer in self.layer_to_sensitivity_mapping.keys():
+            # Use every bitwidth for every layer with its indicator.
+            lp_problem += lpSum([indicator * self.layer_to_sensitivity_mapping[layer][nbits]
+                                 for nbits, indicator in self.layer_to_indicator_vars_mapping[layer].items()]) == \
+                          self.layer_to_objective_vars_mapping[layer]
+
+            # Constraint of only one indicator==1
+            lp_problem += lpSum(
+                [v for v in self.layer_to_indicator_vars_mapping[layer].values()]) == 1
+
+        # Bound the feasible solution space with the desired resource utilization values.
+        self._add_ru_constraints(lp_problem=lp_problem)
+
+        return lp_problem
+
+    def _add_ru_constraints(self, lp_problem: LpProblem):
+        """
+        Adding targets constraints for the Lp problem for the given target resource utilization.
+        The update to the Lp problem object is done inplace.
+
+        Args:
+            lp_problem: An Lp problem object to add constraint to.
+        """
+        indicators = []
+        for layer in self.layer_to_sensitivity_mapping:
+            indicators.extend(list(self.layer_to_indicator_vars_mapping[layer].values()))
+        indicators_vec = np.array(indicators)
+
+        for target, ru_matrix in self.candidates_ru.items():
+            # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
+            # utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration)
+            assert ru_matrix.ndim == 2
+            if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
+                assert ru_matrix.shape[1] == 1
+
+            indicated_ru_matrix = ru_matrix.T * indicators_vec
+            # build lp sum term over all candidates
+            ru_vec = indicated_ru_matrix.sum(axis=1)
+
+            # For cumulative metrics a single constraint is added, for max metrics a separate constraint
+            # is added for each memory element (each element < target => max element < target).
+            assert len(ru_vec) == len(self.ru_constraints[target])
+            for v, c in zip(ru_vec, self.ru_constraints[target]):
+                lp_problem += v <= c

From 07c94861b5226840cbd41627f2249848620c84f3 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 16 Mar 2025 20:15:27 +0200
Subject: [PATCH 11/12] fix lp_search test

---
 .../test_lp_search_bitwidth.py                | 56 ++++++++-----------
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
index d304b4f69..b9c94bde1 100644
--- a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
+++ b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
@@ -20,8 +20,6 @@
 import keras
 from model_compression_toolkit.core import DEFAULTCONFIG
 from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting
-from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \
-    MixedPrecisionSearchManager
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     ResourceUtilization, RUTarget
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
@@ -29,9 +27,8 @@
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width, \
     BitWidthSearchMethod
 from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
-    mp_integer_programming_search
+    MixedPrecisionIntegerLPSolver
 from model_compression_toolkit.core.common.model_collector import ModelCollector
-from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \
     calculate_quantization_params
@@ -46,7 +43,6 @@
 from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \
     get_op_quantization_configs
 from tests.keras_tests.tpc_keras import get_weights_only_mp_tpc_keras
-from pulp import lpSum
 
 
 class MockReconstructionHelper:
@@ -90,38 +86,40 @@ def compute_resource_utilization_matrices(self):
 
 class TestLpSearchBitwidth(unittest.TestCase):
 
+    def _execute(self, mock_search_mgr, target_resource_utilization):
+        candidates_sensitivity = mock_search_mgr.build_sensitivity_mapping()
+        candidates_ru = mock_search_mgr.compute_resource_utilization_matrices()
+        min_ru = mock_search_mgr.min_ru
+        ru_constraints = {k: v - min_ru[k] for k, v in target_resource_utilization.get_resource_utilization_dict(restricted_only=True).items()}
+        lp_solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, ru_constraints)
+        return lp_solver.run()
+
     def test_search_weights_only(self):
         target_resource_utilization = ResourceUtilization(weights_memory=2)
         layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1),
                                    1: ResourceUtilization(weights_memory=2),
                                    0: ResourceUtilization(weights_memory=3)}}
         mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS})
-
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
 
         target_resource_utilization = ResourceUtilization(weights_memory=0)  # Infeasible solution!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(weights_memory=1000))
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
         target_resource_utilization = None  # target ResourceUtilization is not defined!
         with self.assertRaises(Exception):
-            mp_integer_programming_search(mock_search_manager,
-                                          target_resource_utilization=target_resource_utilization)
+            self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         with self.assertRaises(Exception):
-            mp_integer_programming_search(mock_search_manager,
-                                          target_resource_utilization=ResourceUtilization(weights_memory=np.inf))
+            self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=np.inf))
 
     def test_search_activation_only(self):
         target_resource_utilization = ResourceUtilization(activation_memory=2)
@@ -130,20 +128,17 @@ def test_search_activation_only(self):
                                    0: ResourceUtilization(activation_memory=3)}}
         mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.ACTIVATION})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
 
         target_resource_utilization = ResourceUtilization(activation_memory=0)  # Infeasible solution!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(
-                                                    activation_memory=1000))
+        bit_cfg = self._execute(mock_search_manager,
+                                target_resource_utilization=ResourceUtilization(activation_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
@@ -155,19 +150,16 @@ def test_search_weights_and_activation(self):
                                    0: ResourceUtilization(weights_memory=3, activation_memory=3)}}
         mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS, RUTarget.ACTIVATION})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
 
         target_resource_utilization = ResourceUtilization(weights_memory=0, activation_memory=0)  # Infeasible solution!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(weights_memory=1000,
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=1000,
                                                                                                 activation_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
@@ -180,8 +172,7 @@ def test_search_total_resource_utilization(self):
                                    0: ResourceUtilization(weights_memory=3, activation_memory=3, total_memory=6)}}
         mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.TOTAL})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
@@ -193,8 +184,7 @@ def test_search_bops_ru(self):
                                    0: ResourceUtilization(bops=3)}}
         mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.BOPS})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)

From ab69efbf1f6304bded657f1508a6c2c80b795e60 Mon Sep 17 00:00:00 2001
From: irenab <Irena.Byzalov@altair-semi.com>
Date: Sun, 23 Mar 2025 11:20:03 +0200
Subject: [PATCH 12/12] add missing type hints

---
 .../mixed_precision/mixed_precision_search_manager.py      | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index 124e0e317..c878dccfb 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -17,7 +17,7 @@
 
 from tqdm import tqdm
 
-from typing import Dict, List
+from typing import Dict, List, Tuple
 
 import numpy as np
 
@@ -199,7 +199,7 @@ def _build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, fl
 
         return layer_to_metrics_mapping
 
-    def _get_mp_graph(self, graph, target_resource_utilization):
+    def _get_mp_graph(self, graph: Graph, target_resource_utilization: ResourceUtilization) -> Tuple[Graph, bool]:
         """
         Get graph for mixed precision search. Virtual graph is built if bops is restricted and both activation and
         weights are configurable.
@@ -209,7 +209,8 @@ def _get_mp_graph(self, graph, target_resource_utilization):
             target_resource_utilization: target resource utilization.
 
         Returns:
-            Graph for mixed precision search (virtual or original).
+            Graph for mixed precision search (virtual or original), and a boolean flag whether a virtual graph has been
+            constructed.
         """
         if (target_resource_utilization.bops_restricted() and
                 graph.has_any_configurable_activation() and