diff --git a/model_compression_toolkit/core/common/graph/base_graph.py b/model_compression_toolkit/core/common/graph/base_graph.py
index cb54aac0e..7914559e6 100644
--- a/model_compression_toolkit/core/common/graph/base_graph.py
+++ b/model_compression_toolkit/core/common/graph/base_graph.py
@@ -706,14 +706,24 @@ def update_fused_nodes(self, fusion: List[Any]):
         """
         self.fused_nodes.append(fusion)
 
-    def is_single_activation_cfg(self):
+    def has_any_configurable_activation(self) -> bool:
         """
-        Checks whether all nodes in the graph that have activation quantization are quantized with the same bit-width.
+        Checks whether any node in the graph has a configurable activation quantization.
 
-        Returns: True if all quantization config candidates of all nodes have the same activation quantization bit-width.
+        Returns:
+            Whether any node in the graph has a configurable activation quantization.
+        """
+        return any([n.has_configurable_activation() for n in self.nodes])
+
+    def has_any_configurable_weights(self):
+        """
+        Checks whether any node in the graph has any configurable weights quantization.
 
+        Returns:
+            Whether any node in the graph has any configurable weights quantization.
         """
-        return all([n.is_all_activation_candidates_equal() for n in self.nodes])
+
+        return any([n.has_any_configurable_weight() for n in self.nodes])
 
     def replace_node(self, node_to_replace: BaseNode, new_node: BaseNode):
         """
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
index 400cbb9e0..4bd9134bb 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import List, Set, Dict, Optional, Tuple, Any
+from typing import List, Set, Dict, Tuple
 
 import numpy as np
 
 from model_compression_toolkit.core import FrameworkInfo
-from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     RUTarget
@@ -36,42 +36,46 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple
         self.fw_impl = fw_impl
         self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
 
-    def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]:
+    def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: List[int]) -> Dict[RUTarget, np.ndarray]:
         """
-        Compute utilization of requested targets for a specific configuration in the format expected by LP problem
-        formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
-        (between calls).
+        Compute utilization of requested targets for a specific configuration:
+          for weights and bops - total utilization,
+          for activations and total - utilization per cut.
 
         Args:
             ru_targets: resource utilization targets to compute.
             mp_cfg: a list of candidates indices for configurable layers.
 
         Returns:
-            Dict of the computed utilization per target.
+            Dict of the computed utilization per target, as 1d vector.
         """
-
-        ru = {}
-        act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None)
-        if RUTarget.WEIGHTS in ru_targets:
-            wu = self._weights_utilization(w_qcs)
-            ru[RUTarget.WEIGHTS] = np.array(list(wu.values()))
-
-        if RUTarget.ACTIVATION in ru_targets:
-            au = self._activation_utilization(act_qcs)
-            ru[RUTarget.ACTIVATION] = np.array(list(au.values()))
-
-        if RUTarget.BOPS in ru_targets:
-            ru[RUTarget.BOPS] = self._bops_utilization(act_qcs=act_qcs, w_qcs=w_qcs)
-
-        if RUTarget.TOTAL in ru_targets:
-            raise ValueError('Total target should be computed based on weights and activations targets.')
-
-        assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
-                                            f'Requested {ru_targets}')
-        return ru
+        act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg)
+
+        ru, detailed_ru = self.ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized,
+                                                                          BitwidthMode.QCustom,
+                                                                          act_qcs=act_qcs,
+                                                                          w_qcs=w_qcs,
+                                                                          ru_targets=ru_targets,
+                                                                          allow_unused_qcs=True,
+                                                                          return_detailed=True)
+
+        ru_dict = {k: np.array([v]) for k, v in ru.get_resource_utilization_dict(restricted_only=True).items()}
+        # For activation and total we need utilization per cut, as different mp configurations might result in
+        # different cuts to be maximal.
+        for target in [RUTarget.ACTIVATION, RUTarget.TOTAL]:
+            if target in ru_dict:
+                ru_dict[target] = np.array(list(detailed_ru[target].values()))
+
+        assert all(v.ndim == 1 for v in ru_dict.values())
+        if RUTarget.ACTIVATION in ru_targets and RUTarget.TOTAL in ru_targets:
+            assert ru_dict[RUTarget.ACTIVATION].shape == ru_dict[RUTarget.TOTAL].shape
+
+        assert len(ru_dict) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.'
+                                                 f'Requested {ru_targets}')
+        return ru_dict
 
     def get_quantization_candidates(self, mp_cfg) \
-            -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]:
+            -> Tuple[Dict[str, NodeActivationQuantizationConfig], Dict[str, NodeWeightsQuantizationConfig]]:
         """
         Retrieve quantization candidates objects for weights and activations from the configuration list.
 
@@ -87,71 +91,3 @@ def get_quantization_candidates(self, mp_cfg) \
         act_qcs = {n.name: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()}
         w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()}
         return act_qcs, w_qcs
-
-    def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]:
-        """
-        Compute weights utilization for configurable weights if configuration is passed,
-        or for non-configurable nodes otherwise.
-
-        Args:
-            w_qcs: nodes quantization configuration to compute, or None.
-
-        Returns:
-            Weight utilization per node.
-        """
-        if w_qcs:
-            target_criterion = TargetInclusionCriterion.QConfigurable
-            bitwidth_mode = BitwidthMode.QCustom
-        else:
-            target_criterion = TargetInclusionCriterion.QNonConfigurable
-            bitwidth_mode = BitwidthMode.QDefaultSP
-
-        _, nodes_util, _ = self.ru_calculator.compute_weights_utilization(target_criterion=target_criterion,
-                                                                          bitwidth_mode=bitwidth_mode,
-                                                                          w_qcs=w_qcs)
-        nodes_util = {n: u.bytes for n, u in nodes_util.items()}
-        return nodes_util
-
-    def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \
-            -> Optional[Dict[Any, float]]:
-        """
-        Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
-
-        Args:
-            act_qcs: nodes activation configuration or None.
-
-        Returns:
-            Activation utilization per cut, or empty dict if no configuration was passed.
-        """
-        # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
-        # covered by the computation of configurable activations.
-        if not act_qcs:
-            return {}
-
-        _, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut(
-            TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs)
-        cuts_util = {c: u.bytes for c, u in cuts_util.items()}
-        return cuts_util
-
-    def _bops_utilization(self,
-                          act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]],
-                          w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> np.ndarray:
-        """
-        Computes a resource utilization vector with the respective bit-operations (BOPS) count
-        according to the given mixed-precision configuration.
-
-        Args:
-            act_qcs: nodes activation configuration or None.
-            w_qcs: nodes quantization configuration to compute, or None.
-              Either both are provided, or both are None.
-
-        Returns:
-            A vector of node's BOPS count.
-        """
-        assert [act_qcs, w_qcs].count(None) in [0, 2], 'act_qcs and w_qcs should both be provided or both be None.'
-        if act_qcs is None:
-            return np.array([])
-
-        _, detailed_bops = self.ru_calculator.compute_bops(TargetInclusionCriterion.Any, BitwidthMode.QCustom,
-                                                           act_qcs=act_qcs, w_qcs=w_qcs)
-        return np.array(list(detailed_bops.values()))
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
index 9a473cad0..4189cc37a 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py
@@ -13,37 +13,27 @@
 # limitations under the License.
 # ==============================================================================
 
-import copy
 from enum import Enum
-import numpy as np
-from typing import List, Callable, Dict
+from typing import List, Callable
 
 from model_compression_toolkit.core import MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.core.common.hessian import HessianInfoService
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
-from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
-    mp_integer_programming_search
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.hessian import HessianInfoService
+from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \
+    MixedPrecisionSearchManager
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
+    ResourceUtilization
 from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \
     greedy_solution_refinement_procedure
-from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
-from model_compression_toolkit.logger import Logger
 
 
 class BitWidthSearchMethod(Enum):
-    # When adding a new search_methods MP configuration method, these enum and factory dictionary
-    # should be updated with it's kind and a search_method implementation.
     INTEGER_PROGRAMMING = 0
 
 
-search_methods = {
-    BitWidthSearchMethod.INTEGER_PROGRAMMING: mp_integer_programming_search}
-
-
-def search_bit_width(graph_to_search_cfg: Graph,
+def search_bit_width(graph: Graph,
                      fw_info: FrameworkInfo,
                      fw_impl: FrameworkImplementation,
                      target_resource_utilization: ResourceUtilization,
@@ -60,7 +50,7 @@ def search_bit_width(graph_to_search_cfg: Graph,
     target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now.
 
     Args:
-        graph_to_search_cfg: Graph to search a MP configuration for.
+        graph: Graph to search a MP configuration for.
         fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         fw_impl: FrameworkImplementation object with specific framework methods implementation.
         target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
@@ -75,17 +65,7 @@ def search_bit_width(graph_to_search_cfg: Graph,
         bit-width index on the node).
 
     """
-
-    # target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now.
-    if target_resource_utilization is None:
-        Logger.critical("Target ResourceUtilization is required for the bit-width search method's configuration.")  # pragma: no cover
-
-    # Set graph for MP search
-    graph = copy.deepcopy(graph_to_search_cfg)  # Copy graph before searching
-    if target_resource_utilization.bops_restricted():
-        # TODO: we only need the virtual graph is both activations and weights are configurable
-        # Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search
-        graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling())
+    assert target_resource_utilization.is_any_restricted()
 
     # If we only run weights compression with MP than no need to consider activation quantization when computing the
     # MP metric (it adds noise to the computation)
@@ -93,33 +73,28 @@ def search_bit_width(graph_to_search_cfg: Graph,
     weight_only_restricted = tru.weight_restricted() and not (tru.activation_restricted() or
                                                               tru.total_mem_restricted() or
                                                               tru.bops_restricted())
-    disable_activation_for_metric = weight_only_restricted or graph_to_search_cfg.is_single_activation_cfg()
+    disable_activation_for_metric = weight_only_restricted or not graph.has_any_configurable_activation()
 
     # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
     # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
     se = fw_impl.get_sensitivity_evaluator(
-        graph_to_search_cfg,
+        graph,
         mp_config,
         representative_data_gen=representative_data_gen,
         fw_info=fw_info,
         disable_activation_for_metric=disable_activation_for_metric,
         hessian_info_service=hessian_info_service)
 
-    # Instantiate a manager object
+    if search_method != BitWidthSearchMethod.INTEGER_PROGRAMMING:
+        raise NotImplementedError()
+
+    # Search manager and LP are highly coupled, so LP search method was moved inside search manager.
     search_manager = MixedPrecisionSearchManager(graph,
                                                  fw_info,
                                                  fw_impl,
                                                  se,
-                                                 target_resource_utilization,
-                                                 original_graph=graph_to_search_cfg)
-
-    if search_method not in search_methods:
-        raise NotImplementedError()  # pragma: no cover
-
-    search_method_fn = search_methods[search_method]
-    # Search for the desired mixed-precision configuration
-    result_bit_cfg = search_method_fn(search_manager,
-                                      target_resource_utilization)
+                                                 target_resource_utilization)
+    result_bit_cfg = search_manager.search()
 
     if mp_config.refine_mp_solution:
         result_bit_cfg = greedy_solution_refinement_procedure(result_bit_cfg, search_manager, target_resource_utilization)
diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
index 862896197..c878dccfb 100644
--- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
+++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py
@@ -12,11 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import copy
+from collections import defaultdict
 
-from typing import Callable, Dict, List
+from tqdm import tqdm
+
+from typing import Dict, List, Tuple
 
 import numpy as np
 
+from model_compression_toolkit.constants import EPS
 from model_compression_toolkit.core.common import BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
@@ -29,7 +34,10 @@
     TargetInclusionCriterion, BitwidthMode
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \
     MixedPrecisionRUHelper
+from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
+    MixedPrecisionIntegerLPSolver
 from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
+from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
 from model_compression_toolkit.logger import Logger
 
 
@@ -43,8 +51,7 @@ def __init__(self,
                  fw_info: FrameworkInfo,
                  fw_impl: FrameworkImplementation,
                  sensitivity_evaluator: SensitivityEvaluation,
-                 target_resource_utilization: ResourceUtilization,
-                 original_graph: Graph = None):
+                 target_resource_utilization: ResourceUtilization):
         """
 
         Args:
@@ -54,96 +61,208 @@ def __init__(self,
             sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
                 a bit-width configuration for the MP model.
             target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
-            original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument
-                will contain the original graph (for config reconstruction purposes).
         """
 
-        self.graph = graph
-        self.original_graph = graph if original_graph is None else original_graph
         self.fw_info = fw_info
         self.fw_impl = fw_impl
+
+        self.original_graph = graph
+        # graph for mp search
+        self.mp_graph, self.using_virtual_graph = self._get_mp_graph(graph, target_resource_utilization)
+        del graph  # so that it's not used by mistake
+
         self.sensitivity_evaluator = sensitivity_evaluator
+        self.target_resource_utilization = target_resource_utilization
+
+        self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
         self.layer_to_bitwidth_mapping = self.get_search_space()
-        self.compute_metric_fn = self.get_sensitivity_metric()
-        self._cuts = None
 
-        # To define RU Total constraints we need to compute weights and activations even if they have no constraints
-        # TODO currently this logic is duplicated in linear_programming.py
-        targets = target_resource_utilization.get_restricted_targets()
-        if RUTarget.TOTAL in targets:
-            targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
-        self.ru_targets_to_compute = targets
+        self.ru_targets = target_resource_utilization.get_restricted_targets()
+        self.ru_helper = MixedPrecisionRUHelper(self.mp_graph, fw_info, fw_impl)
 
-        self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl)
-        self.target_resource_utilization = target_resource_utilization
-        self.min_ru_config = self.graph.get_min_candidates_config(fw_info)
-        self.max_ru_config = self.graph.get_max_candidates_config(fw_info)
-        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config)
-        self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None)
+        self.min_ru_config = self.mp_graph.get_min_candidates_config(fw_info)
+        self.max_ru_config = self.mp_graph.get_max_candidates_config(fw_info)
+        self.min_ru = self.ru_helper.compute_utilization(self.ru_targets, self.min_ru_config)
 
-        self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph,
+        self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph,
                                                                        original_graph=self.original_graph)
 
-    def get_search_space(self) -> Dict[int, List[int]]:
+    def search(self) -> List[int]:
         """
-        The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
-        for the node).
+        Run mixed precision search.
 
         Returns:
-            The entire search space of the graph.
+            Indices of the selected bit-widths candidates.
         """
+        candidates_sensitivity = self._build_sensitivity_mapping()
+        candidates_ru = self._compute_relative_ru_matrices()
+        rel_target_ru = self._get_relative_ru_constraint_per_mem_element()
+        solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, rel_target_ru)
+        config = solver.run()
 
-        indices_mapping = {}
-        nodes_to_configure = self.graph.get_configurable_sorted_nodes(self.fw_info)
-        for idx, n in enumerate(nodes_to_configure):
-            # For each node, get all possible bitwidth indices for it
-            # (which is a list from 0 to the length of the candidates mp_config list of the node).
-            indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg)))  # all search_methods space
-        return indices_mapping
+        if self.using_virtual_graph:
+            config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
+        return config
 
-    def get_sensitivity_metric(self) -> Callable:
+    def _get_relative_ru_constraint_per_mem_element(self) -> Dict[RUTarget, np.ndarray]:
         """
+        Computes resource utilization constraint with respect to the minimal bit configuration, i.e. corresponding
+        constraint for each memory element is the relative utilization between the target utilization and
+        element's utilization for min-bit configuration.
+
+        Returns:
+            A dictionary of relative resource utilization constraints per ru target.
+
+        Raises:
+            ValueError: if target resource utilization cannot be satisfied (utilization for the minimal bit
+              configuration exceeds the requested target utilization for any target).
+        """
+        target_ru = self.target_resource_utilization.get_resource_utilization_dict(restricted_only=True)
+        rel_target_ru = {
+            ru_target: ru - self.min_ru[ru_target] for ru_target, ru in target_ru.items()
+        }
+        unsatisfiable_targets = {
+            ru_target.value: target_ru[ru_target] for ru_target, ru in rel_target_ru.items() if any(ru < 0)
+        }
+        if unsatisfiable_targets:
+            raise ValueError(f"The model cannot be quantized to meet the specified resource utilization for the "
+                             f"following targets: {unsatisfiable_targets}")
+        return rel_target_ru
+
+    def _build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]:
+        """
+        This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
+        It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
+        For each node and some possible node's bitwidth (according to the given search space), we use
+        the framework function compute_metric_fn in order to infer
+        a batch of images, and compute (using the inference results) the sensitivity metric of
+        the configured mixed-precision model.
+
+        Args:
+            eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
 
-        Returns: Return a function (from the framework implementation) to compute a metric that
-        indicates the similarity of the mixed-precision model (to the float model) for a given
-        mixed-precision configuration.
+        Returns:
+            Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
+            the sensitivity of the model.
 
         """
-        # Get from the framework an evaluation function on how a MP configuration,
-        # affects the expected loss.
 
-        return self.sensitivity_evaluator.compute_metric
+        Logger.info('Starting to evaluate metrics')
+        layer_to_metrics_mapping = {}
+
+        compute_metric = self.sensitivity_evaluator.compute_metric
+        if self.using_virtual_graph:
+            origin_max_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
+                self.max_ru_config)
+            max_config_value = compute_metric(origin_max_config)
+        else:
+            max_config_value = compute_metric(self.max_ru_config)
+
+        for node_idx, layer_possible_bitwidths_indices in tqdm(self.layer_to_bitwidth_mapping.items(),
+                                                               total=len(self.layer_to_bitwidth_mapping)):
+            layer_to_metrics_mapping[node_idx] = {}
+
+            for bitwidth_idx in layer_possible_bitwidths_indices:
+                if self.max_ru_config[node_idx] == bitwidth_idx:
+                    # This is a computation of the metric for the max configuration, assign pre-calculated value
+                    layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
+                    continue
+
+                # Create a configuration that differs at one layer only from the baseline model
+                mp_model_configuration = self.max_ru_config.copy()
+                mp_model_configuration[node_idx] = bitwidth_idx
+
+                # Build a distance matrix using the function we got from the framework implementation.
+                if self.using_virtual_graph:
+                    # Reconstructing original graph's configuration from virtual graph's configuration
+                    origin_mp_model_configuration = \
+                        self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
+                            mp_model_configuration,
+                            changed_virtual_nodes_idx=[node_idx],
+                            original_base_config=origin_max_config)
+                    origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
+                                                    c != origin_mp_model_configuration[i]]
+                    metric_value = compute_metric(
+                        origin_mp_model_configuration,
+                        origin_changed_nodes_indices,
+                        origin_max_config)
+                else:
+                    metric_value = compute_metric(
+                        mp_model_configuration,
+                        [node_idx],
+                        self.max_ru_config)
+
+                layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
 
-    def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray:
+        # Finalize distance metric mapping
+        self.finalize_distance_metric(layer_to_metrics_mapping)
+
+        return layer_to_metrics_mapping
+
+    def _get_mp_graph(self, graph: Graph, target_resource_utilization: ResourceUtilization) -> Tuple[Graph, bool]:
         """
-        Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization.
-        Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
+        Get graph for mixed precision search. Virtual graph is built if bops is restricted and both activation and
+        weights are configurable.
 
         Args:
-            target: The resource target for which the resource utilization is calculated (a RUTarget value).
+            graph: input graph.
+            target_resource_utilization: target resource utilization.
+
+        Returns:
+            Graph for mixed precision search (virtual or original), and a boolean flag whether a virtual graph has been
+            constructed.
+        """
+        if (target_resource_utilization.bops_restricted() and
+                graph.has_any_configurable_activation() and
+                graph.has_any_configurable_weights()):
+            mp_graph = substitute(copy.deepcopy(graph),
+                                  self.fw_impl.get_substitutions_virtual_weights_activation_coupling())
+            return mp_graph, True
+
+        return graph, False
+
+    def get_search_space(self) -> Dict[int, List[int]]:
+        """
+        The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces
+        for the node).
 
         Returns:
-            A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements
-            depends on the target, e.g. num nodes or num cuts, for which utilization is computed.
+            The entire search space of the graph.
         """
-        assert isinstance(target, RUTarget), f"{target} is not a valid resource target"
 
-        configurable_sorted_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info)
+        indices_mapping = {}
+        for idx, n in enumerate(self.mp_topo_configurable_nodes):
+            # For each node, get all possible bitwidth indices for it
+            # (which is a list from 0 to the length of the candidates mp_config list of the node).
+            indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg)))  # all search_methods space
+        return indices_mapping
+
+    def _compute_relative_ru_matrices(self) -> Dict[RUTarget, np.ndarray]:
+        """
+        Computes and builds a resource utilization matrix for all restricted targets, to be used for the
+        mixed-precision search problem formalization.
+        Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0.
 
-        ru_matrix = []
-        for c, c_n in enumerate(configurable_sorted_nodes):
+        Returns:
+            A dictionary containing resource utilization matrix of shape (num configurations, num memory elements)
+            per ru target. Num memory elements depends on the target, e.g. num cuts or 1 for cumulative metrics.
+        """
+        rus_per_candidate = defaultdict(list)
+        for c, c_n in enumerate(self.mp_topo_configurable_nodes):
             for candidate_idx in range(len(c_n.candidates_quantization_cfg)):
                 if candidate_idx == self.min_ru_config[c]:
-                    candidate_rus = self.min_ru[target]
+                    candidate_rus = self.min_ru
                 else:
-                    candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target)
+                    candidate_rus = self.compute_ru_for_candidate(c, candidate_idx)
 
-                ru_matrix.append(np.asarray(candidate_rus))
+                for target, ru in candidate_rus.items():
+                    rus_per_candidate[target].append(ru)
 
-        np_ru_matrix = np.array(ru_matrix) - self.min_ru[target]    # num configurations X num elements
-        return np_ru_matrix
+        # Each target contains a matrix of num configurations X num elements
+        relative_rus = {target: np.array(ru) - self.min_ru[target] for target, ru in rus_per_candidate.items()}
+        return relative_rus
 
-    def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray:
+    def compute_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int) -> Dict[RUTarget, np.ndarray]:
         """
         Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal
         target configuration with the given candidate index.
@@ -151,13 +270,13 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int,
         Args:
             conf_node_idx: The index of a node in a sorted configurable nodes list.
             candidate_idx: Quantization config candidate to be used for the node's resource utilization computation.
-            target: The target for which the resource utilization is calculated (a RUTarget value).
 
-        Returns: Node's resource utilization vector.
+        Returns:
+            Node's resource utilization vector.
 
         """
         cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx)
-        return self.ru_helper.compute_utilization({target}, cfg)[target]
+        return self.ru_helper.compute_utilization(self.ru_targets, cfg)
 
     @staticmethod
     def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]:
@@ -191,7 +310,7 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource
         act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config)
         ru = self.ru_helper.ru_calculator.compute_resource_utilization(
             target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs,
-            w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute, allow_unused_qcs=True)
+            w_qcs=w_qcs, ru_targets=self.ru_targets, allow_unused_qcs=True)
         return ru
 
     def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]):
diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py
index d2746da1b..afb03f06a 100644
--- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py
+++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py
@@ -51,25 +51,34 @@ class ResourceUtilization:
     bops: float = np.inf
 
     def weight_restricted(self):
-        return self.weights_memory < np.inf
+        return self._is_restricted(self.weights_memory)
 
     def activation_restricted(self):
-        return self.activation_memory < np.inf
+        return self._is_restricted(self.activation_memory)
 
     def total_mem_restricted(self):
-        return self.total_memory < np.inf
+        return self._is_restricted(self.total_memory)
 
     def bops_restricted(self):
-        return self.bops < np.inf
+        return self._is_restricted(self.bops)
 
-    def get_resource_utilization_dict(self) -> Dict[RUTarget, float]:
+    def get_resource_utilization_dict(self, restricted_only: bool = False) -> Dict[RUTarget, float]:
         """
-        Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target.
+        Get resource utilization as a dictionary.
+
+        Args:
+            restricted_only: whether to include only targets with restricted utilization.
+
+        Returns:
+            A dictionary containing the resource utilization with targets as keys.
         """
-        return {RUTarget.WEIGHTS: self.weights_memory,
-                RUTarget.ACTIVATION: self.activation_memory,
-                RUTarget.TOTAL: self.total_memory,
-                RUTarget.BOPS: self.bops}
+        ru_dict = {RUTarget.WEIGHTS: self.weights_memory,
+                   RUTarget.ACTIVATION: self.activation_memory,
+                   RUTarget.TOTAL: self.total_memory,
+                   RUTarget.BOPS: self.bops}
+        if restricted_only:
+            ru_dict = {k: v for k, v in ru_dict.items() if self._is_restricted(v)}
+        return ru_dict
 
     def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool:
         """
@@ -114,3 +123,6 @@ def get_summary_str(self, restricted: bool):
         if RUTarget.BOPS in targets:
             summary.append(f"BOPS: {self.bops}")
         return ', '.join(summary)
+
+    def _is_restricted(self, v):
+        return v < np.inf
diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
index 408e5a598..07f350d53 100644
--- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
+++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py
@@ -431,8 +431,7 @@ def compute_node_activation_tensor_utilization(self,
         Returns:
             Node's activation utilization.
         """
-        if qc and bitwidth_mode != BitwidthMode.QCustom:
-            raise ValueError(self.unexpected_qc_error)
+        self._validate_custom_qcs(qc, bitwidth_mode)
 
         if target_criterion:
             # only check whether the node meets the criterion
@@ -470,9 +469,6 @@ def compute_bops(self,
             - Total BOPS count of the network.
             - Detailed BOPS count per node.
         """
-        self._validate_custom_qcs(act_qcs, bitwidth_mode)
-        self._validate_custom_qcs(w_qcs, bitwidth_mode)
-
         nodes_bops = {}
         for n in self.graph.get_topo_sorted_nodes():
             w_qc = w_qcs.get(n.name) if w_qcs else None
diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
index 1a3b2102c..4e5155ad4 100644
--- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
+++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py
@@ -12,326 +12,146 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 import numpy as np
 from pulp import *
-from tqdm import tqdm
-from typing import Dict, Tuple, Any, Optional
+from typing import Dict, Tuple
 
-from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget
-from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager
+from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget
 
 # Limit ILP solver runtime in seconds
 SOLVER_TIME_LIMIT = 60
 
 
-def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager,
-                                  target_resource_utilization: ResourceUtilization = None) -> np.ndarray:
-    """
-    Searching and returning a mixed-precision configuration using an ILP optimization solution.
-    It first builds a mapping from each layer's index (in the model) to a dictionary that maps the
-    bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
-    Then, it creates a mapping from each node's index (in the graph) to a dictionary
-    that maps the bitwidth index to the contribution of configuring this node with this
-    bitwidth to the minimal possible resource utilization of the model.
-    Then, and using these mappings, it builds an LP problem and finds an optimal solution.
-    If a solution could not be found, exception is thrown.
-
-    Args:
-        search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
-        target_resource_utilization: Target resource utilization to constrain our LP problem with some resources limitations (like model' weights memory
-        consumption).
-
-    Returns:
-        The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node).
+class MixedPrecisionIntegerLPSolver:
+    """ Integer Linear Programming solver for Mixed Precision.
 
+        Args:
+            layer_to_sensitivity_mapping: sensitivity per candidate per layer.
+            candidates_ru: resource utilization per candidate.
+            ru_constraints: resource utilization constraints corresponding to 'candidates_ru'.
     """
-
-    # Build a mapping from each layer's index (in the model) to a dictionary that maps the
-    # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer.
-
-    if target_resource_utilization is None or search_manager is None:
-        Logger.critical("Invalid parameters: 'target_resource_utilization' and 'search_manager' must not be 'None' "
-                        "for mixed-precision search. Ensure valid inputs are provided.")
-
-    layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_resource_utilization)
-
-    # Init variables to find their values when solving the lp problem.
-    layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_metrics_mapping)
-
-    # Add all equations and inequalities that define the problem.
-    lp_problem = _formalize_problem(layer_to_indicator_vars_mapping,
-                                    layer_to_metrics_mapping,
-                                    layer_to_objective_vars_mapping,
-                                    target_resource_utilization,
-                                    search_manager)
-
-    # Use default PULP solver. Limit runtime in seconds
-    solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
-    lp_problem.solve(solver=solver)  # Try to solve the problem.
-
-    assert lp_problem.status == LpStatusOptimal, Logger.critical(
-        "No solution was found during solving the LP problem")
-    Logger.info(f"ILP status: {LpStatus[lp_problem.status]}")
-
-    # Take the bitwidth index only if its corresponding indicator is one.
-    config = np.asarray(
-        [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
-         nbits_to_indicator
-         in layer_to_indicator_vars_mapping.values()]
-    ).flatten()
-
-    if target_resource_utilization.bops_restricted():
-        return search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config)
-    else:
-        return config
-
-
-def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
-    Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
-    """
-    Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
-    and a variable for each indicator for whether we use the former variable or not.
-
-    Args:
-        layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
-        bitwidth index to the observed sensitivity of the model.
-
-    Returns:
-        A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
-        and the second for indicators for each variable.
-    """
-
-    layer_to_indicator_vars_mapping = dict()
-    layer_to_objective_vars_mapping = dict()
-
-    for layer, nbits_to_metric in layer_to_metrics_mapping.items():
-        layer_to_indicator_vars_mapping[layer] = dict()
-
-        for nbits in nbits_to_metric.keys():
-            layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
-                                                                       lowBound=0,
-                                                                       upBound=1,
-                                                                       cat=LpInteger)
-
-        layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
-
-    return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
-
-
-def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVariable]],
-                       layer_to_metrics_mapping: Dict[int, Dict[int, float]],
-                       layer_to_objective_vars_mapping: Dict[int, LpVariable],
-                       target_resource_utilization: ResourceUtilization,
-                       search_manager: MixedPrecisionSearchManager) -> LpProblem:
-    """
-    Formalize the LP problem by defining all inequalities that define the solution space.
-
-    Args:
-        layer_to_indicator_vars_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to
-        indicator variable.
-        layer_to_metrics_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to sensitivity
-        evaluation.
-        layer_to_objective_vars_mapping: Dictionary that maps each node's index to a bitwidth variable we find its
-        value.
-        target_resource_utilization: Target resource utilization to reduce our feasible solution space.
-        search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
-
-    Returns:
-        The formalized LP problem.
-    """
-
-    lp_problem = LpProblem()  # minimization problem by default
-    lp_problem += lpSum([layer_to_objective_vars_mapping[layer] for layer in
-                         layer_to_metrics_mapping.keys()])  # Objective (minimize acc loss)
-
-    for layer in layer_to_metrics_mapping.keys():
-        # Use every bitwidth for every layer with its indicator.
-        lp_problem += lpSum([indicator * layer_to_metrics_mapping[layer][nbits]
-                             for nbits, indicator in layer_to_indicator_vars_mapping[layer].items()]) == \
-                      layer_to_objective_vars_mapping[layer]
-
-        # Constraint of only one indicator==1
-        lp_problem += lpSum(
-            [v for v in layer_to_indicator_vars_mapping[layer].values()]) == 1
-
-    # Bound the feasible solution space with the desired resource utilization values.
-    # Creates separate constraints for weights utilization and activation utilization.
-    if target_resource_utilization is not None:
+    def __init__(self, layer_to_sensitivity_mapping: Dict[int, Dict[int, float]],
+                 candidates_ru: Dict[RUTarget, np.ndarray],
+                 ru_constraints: Dict[RUTarget, np.ndarray]):
+        self.layer_to_sensitivity_mapping = layer_to_sensitivity_mapping
+        self.candidates_ru = candidates_ru
+        self.ru_constraints = ru_constraints
+
+        self.layer_to_indicator_vars_mapping, self.layer_to_objective_vars_mapping = (
+            self._init_problem_vars(layer_to_sensitivity_mapping))
+
+    def run(self) -> List[int]:
+        """
+        Build and solve an ILP optimization problem.
+
+        Returns:
+            The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node).
+
+        """
+        # Add all equations and inequalities that define the problem.
+        lp_problem = self._formalize_problem()
+
+        # Use default PULP solver. Limit runtime in seconds
+        solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT)
+        lp_problem.solve(solver=solver)  # Try to solve the problem.
+
+        if lp_problem.status != LpStatusOptimal:
+            raise RuntimeError(f'No solution was found for the LP problem, with status {lp_problem.status}')
+
+        # Take the bitwidth index only if its corresponding indicator is one.
+        config = np.asarray(
+            [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for
+             nbits_to_indicator
+             in self.layer_to_indicator_vars_mapping.values()]
+        ).flatten()
+
+        return config.tolist()
+
+    @staticmethod
+    def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[
+        Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]:
+        """
+        Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use,
+        and a variable for each indicator for whether we use the former variable or not.
+
+        Args:
+            layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the
+            bitwidth index to the observed sensitivity of the model.
+
+        Returns:
+            A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem,
+            and the second for indicators for each variable.
+        """
+
+        layer_to_indicator_vars_mapping = dict()
+        layer_to_objective_vars_mapping = dict()
+
+        for layer, nbits_to_metric in layer_to_metrics_mapping.items():
+            layer_to_indicator_vars_mapping[layer] = dict()
+
+            for nbits in nbits_to_metric.keys():
+                layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}",
+                                                                           lowBound=0,
+                                                                           upBound=1,
+                                                                           cat=LpInteger)
+
+            layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0)
+
+        return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping
+
+    def _formalize_problem(self) -> LpProblem:
+        """
+        Formalize the LP problem by defining all inequalities that define the solution space.
+
+        Returns:
+            The formalized LP problem.
+        """
+
+        lp_problem = LpProblem()  # minimization problem by default
+        lp_problem += lpSum([self.layer_to_objective_vars_mapping[layer] for layer in
+                             self.layer_to_sensitivity_mapping.keys()])  # Objective (minimize acc loss)
+
+        for layer in self.layer_to_sensitivity_mapping.keys():
+            # Use every bitwidth for every layer with its indicator.
+            lp_problem += lpSum([indicator * self.layer_to_sensitivity_mapping[layer][nbits]
+                                 for nbits, indicator in self.layer_to_indicator_vars_mapping[layer].items()]) == \
+                          self.layer_to_objective_vars_mapping[layer]
+
+            # Constraint of only one indicator==1
+            lp_problem += lpSum(
+                [v for v in self.layer_to_indicator_vars_mapping[layer].values()]) == 1
+
+        # Bound the feasible solution space with the desired resource utilization values.
+        self._add_ru_constraints(lp_problem=lp_problem)
+
+        return lp_problem
+
+    def _add_ru_constraints(self, lp_problem: LpProblem):
+        """
+        Adding targets constraints for the Lp problem for the given target resource utilization.
+        The update to the Lp problem object is done inplace.
+
+        Args:
+            lp_problem: An Lp problem object to add constraint to.
+        """
         indicators = []
-        for layer in layer_to_metrics_mapping.keys():
-            for _, indicator in layer_to_indicator_vars_mapping[layer].items():
-                indicators.append(indicator)
-
-        indicators_arr = np.array(indicators)
-        indicators_matrix = np.diag(indicators_arr)
-
-        _add_ru_constraints(search_manager=search_manager,
-                            target_resource_utilization=target_resource_utilization,
-                            indicators_matrix=indicators_matrix,
-                            lp_problem=lp_problem,
-                            non_conf_ru_dict=search_manager.non_conf_ru_dict)
-    else:  # pragma: no cover
-        Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. "
-                        "A valid 'target_resource_utilization' is required.")
-    return lp_problem
-
-
-def _add_ru_constraints(search_manager: MixedPrecisionSearchManager,
-                        target_resource_utilization: ResourceUtilization,
-                        indicators_matrix: np.ndarray,
-                        lp_problem: LpProblem,
-                        non_conf_ru_dict: Dict[RUTarget, np.ndarray]):
-    """
-    Adding targets constraints for the Lp problem for the given target resource utilization.
-    The update to the Lp problem object is done inplace.
-
-    Args:
-        search_manager:  MixedPrecisionSearchManager object to be used for resource utilization constraints formalization.
-        target_resource_utilization: Target resource utilization.
-        indicators_matrix: A diagonal matrix of the Lp problem's indicators.
-        lp_problem: An Lp problem object to add constraint to.
-        non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets.
-    """
-    ru_indicated_vectors = {}
-    # targets to add constraints for
-    constraints_targets = target_resource_utilization.get_restricted_targets()
-    # to add constraints for Total target we need to compute weight and activation
-    targets_to_compute = constraints_targets
-    if RUTarget.TOTAL in constraints_targets:
-        targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL}
-
-    for target in targets_to_compute:
-        ru_matrix = search_manager.compute_resource_utilization_matrix(target)    # num elements X num configurations
-        indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix)    # num elements X num configurations
-
-        # Sum the indicated values over all configurations, and add the value for minimal configuration once.
-        # Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory
-        # that would be required if that configuration is selected).
-        # Each element in a vector is an lp object representing the configurations sum term for a memory element.
-        ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target]
-
-        non_conf_ru_vec = non_conf_ru_dict[target]
-        if non_conf_ru_vec is not None and non_conf_ru_vec.size:
-            # add non-conf value as additional mem elements so that they get aggregated
-            ru_vec = np.concatenate([ru_vec, non_conf_ru_vec])
-        ru_indicated_vectors[target] = ru_vec
-
-    # Add constraints only for the restricted targets in target resource utilization.
-    # Adding activation constraints modifies the lp term in ru_indicated_vectors, so if both activation and total
-    # are restricted we first add the constraints for total.
-    if RUTarget.TOTAL in constraints_targets and RUTarget.ACTIVATION in constraints_targets:
-        constraints_targets.remove(RUTarget.ACTIVATION)
-        constraints_targets = list(constraints_targets) + [RUTarget.ACTIVATION]
-    for target in constraints_targets:
-        target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target]
-        aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target)
-        for v in aggr_ru:
-            if isinstance(v, float):
-                if v > target_resource_utilization_value:
-                    Logger.critical(
-                        f"The model cannot be quantized to meet the specified target resource utilization {target.value} "
-                        f"with the value {target_resource_utilization_value}.")  # pragma: no cover
-            else:
-                lp_problem += v <= target_resource_utilization_value
-
-
-def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list:
-    """
-    Aggregate resource utilization values for the LP.
-
-    Args:
-        targets_ru_vec: resource utilization vectors for all precomputed targets.
-        target: resource utilization target.
-
-    Returns:
-        Aggregated resource utilization.
-    """
-    if target == RUTarget.TOTAL:
-        w = lpSum(targets_ru_vec[RUTarget.WEIGHTS])
-        act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION]
-        return [w + v for v in act_ru_vec]
-
-    if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
-        return [lpSum(targets_ru_vec[target])]
-
-    if target == RUTarget.ACTIVATION:
-        # for max aggregation, each value constitutes a separate constraint
-        return list(targets_ru_vec[target])
-
-    raise ValueError(f'Unexpected target {target}.')    # pragma: no cover
-
-
-def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager,
-                                    target_resource_utilization: ResourceUtilization,
-                                    eps: float = EPS) -> Dict[int, Dict[int, float]]:
-    """
-    This function measures the sensitivity of a change in a bitwidth of a layer on the entire model.
-    It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity.
-    For each node and some possible node's bitwidth (according to the given search space), we use
-    the framework function compute_metric_fn in order to infer
-    a batch of images, and compute (using the inference results) the sensitivity metric of
-    the configured mixed-precision model.
-
-    Args:
-        search_manager: MixedPrecisionSearchManager object to be used for problem formalization.
-        target_resource_utilization: ResourceUtilization to constrain our LP problem with some resources limitations
-        (like model' weights memory consumption).
-        eps: Epsilon value to manually increase metric value (if necessary) for numerical stability
-
-    Returns:
-        Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to
-        the sensitivity of the model.
-
-    """
-
-    Logger.info('Starting to evaluate metrics')
-    layer_to_metrics_mapping = {}
-
-    if target_resource_utilization.bops_restricted():
-        origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config)
-        max_config_value = search_manager.compute_metric_fn(origin_max_config)
-    else:
-        max_config_value = search_manager.compute_metric_fn(search_manager.max_ru_config)
-
-    for node_idx, layer_possible_bitwidths_indices in tqdm(search_manager.layer_to_bitwidth_mapping.items(),
-                                                           total=len(search_manager.layer_to_bitwidth_mapping)):
-        layer_to_metrics_mapping[node_idx] = {}
-
-        for bitwidth_idx in layer_possible_bitwidths_indices:
-            if search_manager.max_ru_config[node_idx] == bitwidth_idx:
-                # This is a computation of the metric for the max configuration, assign pre-calculated value
-                layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value
-                continue
-
-            # Create a configuration that differs at one layer only from the baseline model
-            mp_model_configuration = search_manager.max_ru_config.copy()
-            mp_model_configuration[node_idx] = bitwidth_idx
-
-            # Build a distance matrix using the function we got from the framework implementation.
-            if target_resource_utilization.bops_restricted():
-                # Reconstructing original graph's configuration from virtual graph's configuration
-                origin_mp_model_configuration = \
-                    search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(
-                        mp_model_configuration,
-                        changed_virtual_nodes_idx=[node_idx],
-                        original_base_config=origin_max_config)
-                origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if
-                                                c != origin_mp_model_configuration[i]]
-                metric_value = search_manager.compute_metric_fn(
-                    origin_mp_model_configuration,
-                    origin_changed_nodes_indices,
-                    origin_max_config)
-            else:
-                metric_value = search_manager.compute_metric_fn(
-                    mp_model_configuration,
-                    [node_idx],
-                    search_manager.max_ru_config)
-
-            layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps)
-
-    # Finalize distance metric mapping
-    search_manager.finalize_distance_metric(layer_to_metrics_mapping)
-
-    return layer_to_metrics_mapping
+        for layer in self.layer_to_sensitivity_mapping:
+            indicators.extend(list(self.layer_to_indicator_vars_mapping[layer].values()))
+        indicators_vec = np.array(indicators)
+
+        for target, ru_matrix in self.candidates_ru.items():
+            # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall
+            # utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration)
+            assert ru_matrix.ndim == 2
+            if target in [RUTarget.WEIGHTS, RUTarget.BOPS]:
+                assert ru_matrix.shape[1] == 1
+
+            indicated_ru_matrix = ru_matrix.T * indicators_vec
+            # build lp sum term over all candidates
+            ru_vec = indicated_ru_matrix.sum(axis=1)
+
+            # For cumulative metrics a single constraint is added, for max metrics a separate constraint
+            # is added for each memory element (each element < target => max element < target).
+            assert len(ru_vec) == len(self.ru_constraints[target])
+            for v, c in zip(ru_vec, self.ru_constraints[target]):
+                lp_problem += v <= c
diff --git a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py
index 16c49ad53..7277d662e 100644
--- a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py
+++ b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py
@@ -63,7 +63,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int],
                 # layer has max config in the given solution, nothing to optimize
                 continue
 
-            current_node = search_manager.graph.get_configurable_sorted_nodes(search_manager.fw_info)[node_idx]
+            current_node = search_manager.mp_topo_configurable_nodes[node_idx]
             node_candidates = current_node.candidates_quantization_cfg
 
             # only weights kernel attribute is quantized with weights mixed precision
diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
index d9bbb6b8b..60b728065 100644
--- a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
+++ b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py
@@ -308,7 +308,7 @@ def run_test(self, **kwargs):
 
     def compare(self, qat_ready_model, quantization_info):
 
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_mp_cfg))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_mp_cfg)
 
         # check that quantizer gets multiple bits configuration
         for layer in qat_ready_model.layers:
diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py
index 252a38d9a..5e4c2401a 100644
--- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py
+++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py
@@ -259,7 +259,7 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert (quantization_info.mixed_precision_cfg == [1, 1]).all()
+        assert quantization_info.mixed_precision_cfg == [1, 1]
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 16)
@@ -300,7 +300,7 @@ def create_networks(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        self.unit_test.assertTrue((quantization_info.mixed_precision_cfg != 0).any())
+        self.unit_test.assertTrue(any(i for i in quantization_info.mixed_precision_cfg))
 
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
@@ -325,7 +325,7 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert (quantization_info.mixed_precision_cfg == [2, 2]).all()
+        assert quantization_info.mixed_precision_cfg == [2, 2]
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 4)
@@ -443,7 +443,7 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert (quantization_info.mixed_precision_cfg == [0, 1]).all()
+        assert quantization_info.mixed_precision_cfg == [0, 1]
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)
@@ -466,8 +466,8 @@ def get_resource_utilization(self):
 
     def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None):
         conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D)
-        assert any([(quantization_info.mixed_precision_cfg == [1, 0]).all(),
-                    (quantization_info.mixed_precision_cfg == [0, 1]).all()])
+        assert any([quantization_info.mixed_precision_cfg == [1, 0],
+                    quantization_info.mixed_precision_cfg == [0, 1]])
         for i in range(32):  # quantized per channel
             self.unit_test.assertTrue(
                 np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256)
diff --git a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
index 36de90950..b9c94bde1 100644
--- a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
+++ b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from unittest.mock import Mock
+
 import numpy as np
 import unittest
 
@@ -25,9 +27,8 @@
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width, \
     BitWidthSearchMethod
 from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
-    mp_integer_programming_search
+    MixedPrecisionIntegerLPSolver
 from model_compression_toolkit.core.common.model_collector import ModelCollector
-from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig
 from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
 from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \
     calculate_quantization_params
@@ -42,7 +43,6 @@
 from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \
     get_op_quantization_configs
 from tests.keras_tests.tpc_keras import get_weights_only_mp_tpc_keras
-from pulp import lpSum
 
 
 class MockReconstructionHelper:
@@ -57,144 +57,122 @@ def reconstruct_config_from_virtual_graph(self,
 
 
 class MockMixedPrecisionSearchManager:
-    def __init__(self, layer_to_ru_mapping):
+    def __init__(self, layer_to_ru_mapping, ru_targets):
+        self.ru_targets = ru_targets
         self.layer_to_bitwidth_mapping = {0: [0, 1, 2]}
         self.layer_to_ru_mapping = layer_to_ru_mapping
-        self.compute_metric_fn = lambda x, y=None, z=None: {0: 2, 1: 1, 2: 0}[x[0]]
-        self.min_ru = {RUTarget.WEIGHTS: [1],
-                       RUTarget.ACTIVATION: [1],
-                       RUTarget.BOPS: [1]}  # minimal resource utilization in the tests layer_to_ru_mapping
+        self.min_ru = {t: np.array([1]) for t in ru_targets} # minimal resource utilization in the tests layer_to_ru_mapping
 
         self.max_ru_config = [0]
         self.config_reconstruction_helper = MockReconstructionHelper()
-        self.non_conf_ru_dict = {RUTarget.WEIGHTS: None, RUTarget.ACTIVATION: None, RUTarget.BOPS: None}
 
-    def compute_resource_utilization_matrix(self, target):
-        # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test)
-        if target == RUTarget.WEIGHTS:
-            ru_matrix = [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
-        elif target == RUTarget.ACTIVATION:
-            ru_matrix = [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
-        elif target == RUTarget.BOPS:
-            ru_matrix = [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
-        else:
-            raise ValueError('Not supposed to get here')
-        return np.array(ru_matrix).T
+    def build_sensitivity_mapping(self):
+        return {0: {0: 0, 1: 1, 2: 2}}
 
-    def finalize_distance_metric(self, d):
-        return d
+    def compute_resource_utilization_matrices(self):
+        # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test)
+        ru = {
+            RUTarget.WEIGHTS:
+                [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))],
+            RUTarget.ACTIVATION:
+                [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))],
+            RUTarget.BOPS:
+                [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))],
+            RUTarget.TOTAL:
+                [np.flip(np.array([ru.total_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))]
+        }
+        return {k: np.array(v).T for k, v in ru.items() if k in self.ru_targets}
 
 
 class TestLpSearchBitwidth(unittest.TestCase):
 
+    def _execute(self, mock_search_mgr, target_resource_utilization):
+        candidates_sensitivity = mock_search_mgr.build_sensitivity_mapping()
+        candidates_ru = mock_search_mgr.compute_resource_utilization_matrices()
+        min_ru = mock_search_mgr.min_ru
+        ru_constraints = {k: v - min_ru[k] for k, v in target_resource_utilization.get_resource_utilization_dict(restricted_only=True).items()}
+        lp_solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, ru_constraints)
+        return lp_solver.run()
+
     def test_search_weights_only(self):
         target_resource_utilization = ResourceUtilization(weights_memory=2)
         layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1),
                                    1: ResourceUtilization(weights_memory=2),
                                    0: ResourceUtilization(weights_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
-
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS})
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
 
         target_resource_utilization = ResourceUtilization(weights_memory=0)  # Infeasible solution!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(weights_memory=np.inf))
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 0)  # ResourceUtilization is Inf so expecting for the maximal bit-width result
+        self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
         target_resource_utilization = None  # target ResourceUtilization is not defined!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-    def test_search_weights_only_with_non_conf(self):
-        target_resource_utilization = ResourceUtilization(weights_memory=2+11)
-        layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1),
-                                   1: ResourceUtilization(weights_memory=2),
-                                   0: ResourceUtilization(weights_memory=3)}
-                               }
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
-        mock_search_manager.non_conf_ru_dict = {RUTarget.WEIGHTS: np.array([5, 6])}
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
-
-        self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 1)
-
-        # make sure non_conf was taken into account and lower target has a different solution
-        target_resource_utilization = ResourceUtilization(weights_memory=2 + 10.9)
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
-        self.assertFalse(bit_cfg[0] == 1)
+        with self.assertRaises(Exception):
+            self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=np.inf))
 
     def test_search_activation_only(self):
         target_resource_utilization = ResourceUtilization(activation_memory=2)
         layer_to_ru_mapping = {0: {2: ResourceUtilization(activation_memory=1),
                                    1: ResourceUtilization(activation_memory=2),
                                    0: ResourceUtilization(activation_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.ACTIVATION})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
 
         target_resource_utilization = ResourceUtilization(activation_memory=0)  # Infeasible solution!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(
-                                                    activation_memory=np.inf))
+        bit_cfg = self._execute(mock_search_manager,
+                                target_resource_utilization=ResourceUtilization(activation_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 0)  # ResourceUtilization is Inf so expecting for the maximal bit-width result
+        self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
     def test_search_weights_and_activation(self):
         target_resource_utilization = ResourceUtilization(weights_memory=2, activation_memory=2)
         layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1),
                                    1: ResourceUtilization(weights_memory=2, activation_memory=2),
                                    0: ResourceUtilization(weights_memory=3, activation_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS, RUTarget.ACTIVATION})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
 
         target_resource_utilization = ResourceUtilization(weights_memory=0, activation_memory=0)  # Infeasible solution!
         with self.assertRaises(Exception):
-            bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                    target_resource_utilization=target_resource_utilization)
+            bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=ResourceUtilization(weights_memory=np.inf,
-                                                                                                activation_memory=np.inf))
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=1000,
+                                                                                                activation_memory=1000))
 
         self.assertTrue(len(bit_cfg) == 1)
-        self.assertTrue(bit_cfg[0] == 0)  # ResourceUtilization is Inf so expecting for the maximal bit-width result
+        self.assertTrue(bit_cfg[0] == 0)  # expecting for the maximal bit-width result
 
     def test_search_total_resource_utilization(self):
         target_resource_utilization = ResourceUtilization(total_memory=4)
-        layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1),
-                                   1: ResourceUtilization(weights_memory=2, activation_memory=2),
-                                   0: ResourceUtilization(weights_memory=3, activation_memory=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1, total_memory=2),
+                                   1: ResourceUtilization(weights_memory=2, activation_memory=2, total_memory=4),
+                                   0: ResourceUtilization(weights_memory=3, activation_memory=3, total_memory=6)}}
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.TOTAL})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
@@ -204,10 +182,9 @@ def test_search_bops_ru(self):
         layer_to_ru_mapping = {0: {2: ResourceUtilization(bops=1),
                                    1: ResourceUtilization(bops=2),
                                    0: ResourceUtilization(bops=3)}}
-        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping)
+        mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.BOPS})
 
-        bit_cfg = mp_integer_programming_search(mock_search_manager,
-                                                target_resource_utilization=target_resource_utilization)
+        bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization)
 
         self.assertTrue(len(bit_cfg) == 1)
         self.assertTrue(bit_cfg[0] == 1)
@@ -272,32 +249,14 @@ def representative_data_gen():
                                              representative_data_gen,
                                              fw_info=fw_info)
 
-        cfg = search_bit_width(graph_to_search_cfg=graph,
+        cfg = search_bit_width(graph=graph,
                                fw_info=DEFAULT_KERAS_INFO,
                                fw_impl=keras_impl,
-                               target_resource_utilization=ResourceUtilization(np.inf),
+                               target_resource_utilization=ResourceUtilization(weights_memory=100),
                                mp_config=core_config.mixed_precision_config,
                                representative_data_gen=representative_data_gen,
                                search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING)
 
-        with self.assertRaises(Exception):
-            cfg = search_bit_width(graph_to_search_cfg=graph,
-                                   fw_info=DEFAULT_KERAS_INFO,
-                                   fw_impl=keras_impl,
-                                   target_resource_utilization=ResourceUtilization(np.inf),
-                                   mp_config=core_config.mixed_precision_config,
-                                   representative_data_gen=representative_data_gen,
-                                   search_method=None)
-
-        with self.assertRaises(Exception):
-            cfg = search_bit_width(graph_to_search_cfg=graph,
-                                   fw_info=DEFAULT_KERAS_INFO,
-                                   fw_impl=keras_impl,
-                                   target_resource_utilization=None,
-                                   mp_config=core_config.mixed_precision_config,
-                                   representative_data_gen=representative_data_gen,
-                                   search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING)
-
     def test_mixed_precision_search_facade(self):
         core_config_avg_weights = CoreConfig(quantization_config=DEFAULTCONFIG,
                                              mixed_precision_config=MixedPrecisionQuantizationConfig(compute_mse,
diff --git a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py
index 120ef70a9..7c830a4b6 100644
--- a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py
+++ b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py
@@ -162,7 +162,8 @@ def plot_tensor_sizes(self, core_config):
                                           fqc=fqc,
                                           network_editor=[],
                                           quant_config=cfg,
-                                          target_resource_utilization=mct.core.ResourceUtilization(),
+                                          target_resource_utilization=mct.core.ResourceUtilization(weights_memory=73,
+                                                                                                   activation_memory=191),
                                           n_iter=1,
                                           analyze_similarity=True,
                                           mp_cfg=mp_cfg)
diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py
index dfc9edb13..9e2fbf6d0 100644
--- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py
+++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py
@@ -72,7 +72,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info:
         raise NotImplementedError
 
     def verify_config(self, result_config, expected_config):
-        self.unit_test.assertTrue(all(result_config == expected_config),
+        self.unit_test.assertTrue(result_config == expected_config,
                                   f"Configuration mismatch: expected {expected_config} but got {result_config}.")
 
 
diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py
index f09fb5b53..1dd065fe0 100644
--- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py
+++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py
@@ -335,7 +335,7 @@ def get_resource_utilization(self):
         return ResourceUtilization(80)
 
     def compare(self, quantized_models, float_model, input_x=None, quantization_info=None):
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_config))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_config)
 
 
 class MixedPrecisionWeightsTestNet(torch.nn.Module):
diff --git a/tests/pytorch_tests/model_tests/feature_models/qat_test.py b/tests/pytorch_tests/model_tests/feature_models/qat_test.py
index 73154073c..e650feb44 100644
--- a/tests/pytorch_tests/model_tests/feature_models/qat_test.py
+++ b/tests/pytorch_tests/model_tests/feature_models/qat_test.py
@@ -289,7 +289,7 @@ def run_test(self):
                      input_x=self.representative_data_gen(),
                      quantization_info=quantization_info)
 
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == [1, 0, 0, 1, 0]))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1, 0, 0, 1, 0])
 
         # check that quantizer gets multiple bits configuration
         for _, layer in qat_ready_model.named_children():
@@ -336,7 +336,7 @@ def run_test(self):
                      quantization_info=quantization_info)
 
         # check that MP search doesn't return 8 bits configuration for all layers
-        self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == [1, 1, 0, 0, 0]))
+        self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1, 1, 0, 0, 0])
 
         # check that quantizer gets multiple bits configuration
         for _, layer in qat_ready_model.named_children():
diff --git a/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py b/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py
index 5a5fbeee5..7d29842af 100644
--- a/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py
+++ b/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py
@@ -25,7 +25,7 @@
 @pytest.fixture
 def search_manager():
     manager = Mock()
-    manager.graph.get_configurable_sorted_nodes = MagicMock()
+    manager.mp_topo_configurable_nodes = MagicMock()
     manager.fw_info.get_kernel_op_attributes = MagicMock()
     manager.replace_config_in_index = MagicMock(
         side_effect=lambda config, idx, candidate: (
@@ -105,7 +105,7 @@ def test_greedy_solution_refinement_procedure(
     node_mock = Mock()
     node_mock.candidates_quantization_cfg = candidate_configs(weight_bits_dict_0, act_bits_0, weight_bits_dict_1, act_bits_1)
 
-    search_manager.graph.get_configurable_sorted_nodes.return_value = [node_mock]
+    search_manager.mp_topo_configurable_nodes = [node_mock]
 
     search_manager.compute_resource_utilization_for_config = MagicMock(side_effect=lambda config: {
         0: ResourceUtilization(**alternative_candidate_resources_usage),