diff --git a/model_compression_toolkit/core/common/graph/base_graph.py b/model_compression_toolkit/core/common/graph/base_graph.py index cb54aac0e..7914559e6 100644 --- a/model_compression_toolkit/core/common/graph/base_graph.py +++ b/model_compression_toolkit/core/common/graph/base_graph.py @@ -706,14 +706,24 @@ def update_fused_nodes(self, fusion: List[Any]): """ self.fused_nodes.append(fusion) - def is_single_activation_cfg(self): + def has_any_configurable_activation(self) -> bool: """ - Checks whether all nodes in the graph that have activation quantization are quantized with the same bit-width. + Checks whether any node in the graph has a configurable activation quantization. - Returns: True if all quantization config candidates of all nodes have the same activation quantization bit-width. + Returns: + Whether any node in the graph has a configurable activation quantization. + """ + return any([n.has_configurable_activation() for n in self.nodes]) + + def has_any_configurable_weights(self): + """ + Checks whether any node in the graph has any configurable weights quantization. + Returns: + Whether any node in the graph has any configurable weights quantization. """ - return all([n.is_all_activation_candidates_equal() for n in self.nodes]) + + return any([n.has_any_configurable_weight() for n in self.nodes]) def replace_node(self, node_to_replace: BaseNode, new_node: BaseNode): """ diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py index 400cbb9e0..4bd9134bb 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -from typing import List, Set, Dict, Optional, Tuple, Any +from typing import List, Set, Dict, Tuple import numpy as np from model_compression_toolkit.core import FrameworkInfo -from model_compression_toolkit.core.common import Graph, BaseNode +from model_compression_toolkit.core.common import Graph from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ RUTarget @@ -36,42 +36,46 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple self.fw_impl = fw_impl self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info) - def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Optional[List[int]]) -> Dict[RUTarget, np.ndarray]: + def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: List[int]) -> Dict[RUTarget, np.ndarray]: """ - Compute utilization of requested targets for a specific configuration in the format expected by LP problem - formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order - (between calls). + Compute utilization of requested targets for a specific configuration: + for weights and bops - total utilization, + for activations and total - utilization per cut. Args: ru_targets: resource utilization targets to compute. mp_cfg: a list of candidates indices for configurable layers. Returns: - Dict of the computed utilization per target. + Dict of the computed utilization per target, as 1d vector. """ - - ru = {} - act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) if mp_cfg else (None, None) - if RUTarget.WEIGHTS in ru_targets: - wu = self._weights_utilization(w_qcs) - ru[RUTarget.WEIGHTS] = np.array(list(wu.values())) - - if RUTarget.ACTIVATION in ru_targets: - au = self._activation_utilization(act_qcs) - ru[RUTarget.ACTIVATION] = np.array(list(au.values())) - - if RUTarget.BOPS in ru_targets: - ru[RUTarget.BOPS] = self._bops_utilization(act_qcs=act_qcs, w_qcs=w_qcs) - - if RUTarget.TOTAL in ru_targets: - raise ValueError('Total target should be computed based on weights and activations targets.') - - assert len(ru) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.' - f'Requested {ru_targets}') - return ru + act_qcs, w_qcs = self.get_quantization_candidates(mp_cfg) + + ru, detailed_ru = self.ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantized, + BitwidthMode.QCustom, + act_qcs=act_qcs, + w_qcs=w_qcs, + ru_targets=ru_targets, + allow_unused_qcs=True, + return_detailed=True) + + ru_dict = {k: np.array([v]) for k, v in ru.get_resource_utilization_dict(restricted_only=True).items()} + # For activation and total we need utilization per cut, as different mp configurations might result in + # different cuts to be maximal. + for target in [RUTarget.ACTIVATION, RUTarget.TOTAL]: + if target in ru_dict: + ru_dict[target] = np.array(list(detailed_ru[target].values())) + + assert all(v.ndim == 1 for v in ru_dict.values()) + if RUTarget.ACTIVATION in ru_targets and RUTarget.TOTAL in ru_targets: + assert ru_dict[RUTarget.ACTIVATION].shape == ru_dict[RUTarget.TOTAL].shape + + assert len(ru_dict) == len(ru_targets), (f'Mismatch between the number of computed and requested metrics.' + f'Requested {ru_targets}') + return ru_dict def get_quantization_candidates(self, mp_cfg) \ - -> Tuple[Dict[BaseNode, NodeActivationQuantizationConfig], Dict[BaseNode, NodeWeightsQuantizationConfig]]: + -> Tuple[Dict[str, NodeActivationQuantizationConfig], Dict[str, NodeWeightsQuantizationConfig]]: """ Retrieve quantization candidates objects for weights and activations from the configuration list. @@ -87,71 +91,3 @@ def get_quantization_candidates(self, mp_cfg) \ act_qcs = {n.name: cfg.activation_quantization_cfg for n, cfg in node_qcs.items()} w_qcs = {n.name: cfg.weights_quantization_cfg for n, cfg in node_qcs.items()} return act_qcs, w_qcs - - def _weights_utilization(self, w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> Dict[BaseNode, float]: - """ - Compute weights utilization for configurable weights if configuration is passed, - or for non-configurable nodes otherwise. - - Args: - w_qcs: nodes quantization configuration to compute, or None. - - Returns: - Weight utilization per node. - """ - if w_qcs: - target_criterion = TargetInclusionCriterion.QConfigurable - bitwidth_mode = BitwidthMode.QCustom - else: - target_criterion = TargetInclusionCriterion.QNonConfigurable - bitwidth_mode = BitwidthMode.QDefaultSP - - _, nodes_util, _ = self.ru_calculator.compute_weights_utilization(target_criterion=target_criterion, - bitwidth_mode=bitwidth_mode, - w_qcs=w_qcs) - nodes_util = {n: u.bytes for n, u in nodes_util.items()} - return nodes_util - - def _activation_utilization(self, act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]]) \ - -> Optional[Dict[Any, float]]: - """ - Compute activation utilization using MaxCut for all quantized nodes if configuration is passed. - - Args: - act_qcs: nodes activation configuration or None. - - Returns: - Activation utilization per cut, or empty dict if no configuration was passed. - """ - # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already - # covered by the computation of configurable activations. - if not act_qcs: - return {} - - _, cuts_util, *_ = self.ru_calculator.compute_activation_utilization_by_cut( - TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs) - cuts_util = {c: u.bytes for c, u in cuts_util.items()} - return cuts_util - - def _bops_utilization(self, - act_qcs: Optional[Dict[BaseNode, NodeActivationQuantizationConfig]], - w_qcs: Optional[Dict[BaseNode, NodeWeightsQuantizationConfig]]) -> np.ndarray: - """ - Computes a resource utilization vector with the respective bit-operations (BOPS) count - according to the given mixed-precision configuration. - - Args: - act_qcs: nodes activation configuration or None. - w_qcs: nodes quantization configuration to compute, or None. - Either both are provided, or both are None. - - Returns: - A vector of node's BOPS count. - """ - assert [act_qcs, w_qcs].count(None) in [0, 2], 'act_qcs and w_qcs should both be provided or both be None.' - if act_qcs is None: - return np.array([]) - - _, detailed_bops = self.ru_calculator.compute_bops(TargetInclusionCriterion.Any, BitwidthMode.QCustom, - act_qcs=act_qcs, w_qcs=w_qcs) - return np.array(list(detailed_bops.values())) diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py index 9a473cad0..4189cc37a 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py @@ -13,37 +13,27 @@ # limitations under the License. # ============================================================================== -import copy from enum import Enum -import numpy as np -from typing import List, Callable, Dict +from typing import List, Callable from model_compression_toolkit.core import MixedPrecisionQuantizationConfig from model_compression_toolkit.core.common import Graph -from model_compression_toolkit.core.common.hessian import HessianInfoService -from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation -from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager -from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \ - mp_integer_programming_search from model_compression_toolkit.core.common.framework_info import FrameworkInfo +from model_compression_toolkit.core.common.hessian import HessianInfoService +from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import \ + MixedPrecisionSearchManager +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \ + ResourceUtilization from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \ greedy_solution_refinement_procedure -from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute -from model_compression_toolkit.logger import Logger class BitWidthSearchMethod(Enum): - # When adding a new search_methods MP configuration method, these enum and factory dictionary - # should be updated with it's kind and a search_method implementation. INTEGER_PROGRAMMING = 0 -search_methods = { - BitWidthSearchMethod.INTEGER_PROGRAMMING: mp_integer_programming_search} - - -def search_bit_width(graph_to_search_cfg: Graph, +def search_bit_width(graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, target_resource_utilization: ResourceUtilization, @@ -60,7 +50,7 @@ def search_bit_width(graph_to_search_cfg: Graph, target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now. Args: - graph_to_search_cfg: Graph to search a MP configuration for. + graph: Graph to search a MP configuration for. fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize). fw_impl: FrameworkImplementation object with specific framework methods implementation. target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it. @@ -75,17 +65,7 @@ def search_bit_width(graph_to_search_cfg: Graph, bit-width index on the node). """ - - # target_resource_utilization have to be passed. If it was not passed, the facade is not supposed to get here by now. - if target_resource_utilization is None: - Logger.critical("Target ResourceUtilization is required for the bit-width search method's configuration.") # pragma: no cover - - # Set graph for MP search - graph = copy.deepcopy(graph_to_search_cfg) # Copy graph before searching - if target_resource_utilization.bops_restricted(): - # TODO: we only need the virtual graph is both activations and weights are configurable - # Since Bit-operations count target resource utilization is set, we need to reconstruct the graph for the MP search - graph = substitute(graph, fw_impl.get_substitutions_virtual_weights_activation_coupling()) + assert target_resource_utilization.is_any_restricted() # If we only run weights compression with MP than no need to consider activation quantization when computing the # MP metric (it adds noise to the computation) @@ -93,33 +73,28 @@ def search_bit_width(graph_to_search_cfg: Graph, weight_only_restricted = tru.weight_restricted() and not (tru.activation_restricted() or tru.total_mem_restricted() or tru.bops_restricted()) - disable_activation_for_metric = weight_only_restricted or graph_to_search_cfg.is_single_activation_cfg() + disable_activation_for_metric = weight_only_restricted or not graph.has_any_configurable_activation() # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph, # even if a virtual graph was created (and is used only for BOPS utilization computation purposes) se = fw_impl.get_sensitivity_evaluator( - graph_to_search_cfg, + graph, mp_config, representative_data_gen=representative_data_gen, fw_info=fw_info, disable_activation_for_metric=disable_activation_for_metric, hessian_info_service=hessian_info_service) - # Instantiate a manager object + if search_method != BitWidthSearchMethod.INTEGER_PROGRAMMING: + raise NotImplementedError() + + # Search manager and LP are highly coupled, so LP search method was moved inside search manager. search_manager = MixedPrecisionSearchManager(graph, fw_info, fw_impl, se, - target_resource_utilization, - original_graph=graph_to_search_cfg) - - if search_method not in search_methods: - raise NotImplementedError() # pragma: no cover - - search_method_fn = search_methods[search_method] - # Search for the desired mixed-precision configuration - result_bit_cfg = search_method_fn(search_manager, - target_resource_utilization) + target_resource_utilization) + result_bit_cfg = search_manager.search() if mp_config.refine_mp_solution: result_bit_cfg = greedy_solution_refinement_procedure(result_bit_cfg, search_manager, target_resource_utilization) diff --git a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py index 862896197..c878dccfb 100644 --- a/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py +++ b/model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py @@ -12,11 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +import copy +from collections import defaultdict -from typing import Callable, Dict, List +from tqdm import tqdm + +from typing import Dict, List, Tuple import numpy as np +from model_compression_toolkit.constants import EPS from model_compression_toolkit.core.common import BaseNode from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation from model_compression_toolkit.core.common.framework_info import FrameworkInfo @@ -29,7 +34,10 @@ TargetInclusionCriterion, BitwidthMode from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_helper import \ MixedPrecisionRUHelper +from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \ + MixedPrecisionIntegerLPSolver from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation +from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute from model_compression_toolkit.logger import Logger @@ -43,8 +51,7 @@ def __init__(self, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation, sensitivity_evaluator: SensitivityEvaluation, - target_resource_utilization: ResourceUtilization, - original_graph: Graph = None): + target_resource_utilization: ResourceUtilization): """ Args: @@ -54,96 +61,208 @@ def __init__(self, sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of a bit-width configuration for the MP model. target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it. - original_graph: In case we have a search over a virtual graph (if we have BOPS utilization target), then this argument - will contain the original graph (for config reconstruction purposes). """ - self.graph = graph - self.original_graph = graph if original_graph is None else original_graph self.fw_info = fw_info self.fw_impl = fw_impl + + self.original_graph = graph + # graph for mp search + self.mp_graph, self.using_virtual_graph = self._get_mp_graph(graph, target_resource_utilization) + del graph # so that it's not used by mistake + self.sensitivity_evaluator = sensitivity_evaluator + self.target_resource_utilization = target_resource_utilization + + self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info) self.layer_to_bitwidth_mapping = self.get_search_space() - self.compute_metric_fn = self.get_sensitivity_metric() - self._cuts = None - # To define RU Total constraints we need to compute weights and activations even if they have no constraints - # TODO currently this logic is duplicated in linear_programming.py - targets = target_resource_utilization.get_restricted_targets() - if RUTarget.TOTAL in targets: - targets = targets.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL} - self.ru_targets_to_compute = targets + self.ru_targets = target_resource_utilization.get_restricted_targets() + self.ru_helper = MixedPrecisionRUHelper(self.mp_graph, fw_info, fw_impl) - self.ru_helper = MixedPrecisionRUHelper(graph, fw_info, fw_impl) - self.target_resource_utilization = target_resource_utilization - self.min_ru_config = self.graph.get_min_candidates_config(fw_info) - self.max_ru_config = self.graph.get_max_candidates_config(fw_info) - self.min_ru = self.ru_helper.compute_utilization(self.ru_targets_to_compute, self.min_ru_config) - self.non_conf_ru_dict = self.ru_helper.compute_utilization(self.ru_targets_to_compute, None) + self.min_ru_config = self.mp_graph.get_min_candidates_config(fw_info) + self.max_ru_config = self.mp_graph.get_max_candidates_config(fw_info) + self.min_ru = self.ru_helper.compute_utilization(self.ru_targets, self.min_ru_config) - self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.graph, + self.config_reconstruction_helper = ConfigReconstructionHelper(virtual_graph=self.mp_graph, original_graph=self.original_graph) - def get_search_space(self) -> Dict[int, List[int]]: + def search(self) -> List[int]: """ - The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces - for the node). + Run mixed precision search. Returns: - The entire search space of the graph. + Indices of the selected bit-widths candidates. """ + candidates_sensitivity = self._build_sensitivity_mapping() + candidates_ru = self._compute_relative_ru_matrices() + rel_target_ru = self._get_relative_ru_constraint_per_mem_element() + solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, rel_target_ru) + config = solver.run() - indices_mapping = {} - nodes_to_configure = self.graph.get_configurable_sorted_nodes(self.fw_info) - for idx, n in enumerate(nodes_to_configure): - # For each node, get all possible bitwidth indices for it - # (which is a list from 0 to the length of the candidates mp_config list of the node). - indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg))) # all search_methods space - return indices_mapping + if self.using_virtual_graph: + config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config) + return config - def get_sensitivity_metric(self) -> Callable: + def _get_relative_ru_constraint_per_mem_element(self) -> Dict[RUTarget, np.ndarray]: """ + Computes resource utilization constraint with respect to the minimal bit configuration, i.e. corresponding + constraint for each memory element is the relative utilization between the target utilization and + element's utilization for min-bit configuration. + + Returns: + A dictionary of relative resource utilization constraints per ru target. + + Raises: + ValueError: if target resource utilization cannot be satisfied (utilization for the minimal bit + configuration exceeds the requested target utilization for any target). + """ + target_ru = self.target_resource_utilization.get_resource_utilization_dict(restricted_only=True) + rel_target_ru = { + ru_target: ru - self.min_ru[ru_target] for ru_target, ru in target_ru.items() + } + unsatisfiable_targets = { + ru_target.value: target_ru[ru_target] for ru_target, ru in rel_target_ru.items() if any(ru < 0) + } + if unsatisfiable_targets: + raise ValueError(f"The model cannot be quantized to meet the specified resource utilization for the " + f"following targets: {unsatisfiable_targets}") + return rel_target_ru + + def _build_sensitivity_mapping(self, eps: float = EPS) -> Dict[int, Dict[int, float]]: + """ + This function measures the sensitivity of a change in a bitwidth of a layer on the entire model. + It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity. + For each node and some possible node's bitwidth (according to the given search space), we use + the framework function compute_metric_fn in order to infer + a batch of images, and compute (using the inference results) the sensitivity metric of + the configured mixed-precision model. + + Args: + eps: Epsilon value to manually increase metric value (if necessary) for numerical stability - Returns: Return a function (from the framework implementation) to compute a metric that - indicates the similarity of the mixed-precision model (to the float model) for a given - mixed-precision configuration. + Returns: + Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to + the sensitivity of the model. """ - # Get from the framework an evaluation function on how a MP configuration, - # affects the expected loss. - return self.sensitivity_evaluator.compute_metric + Logger.info('Starting to evaluate metrics') + layer_to_metrics_mapping = {} + + compute_metric = self.sensitivity_evaluator.compute_metric + if self.using_virtual_graph: + origin_max_config = self.config_reconstruction_helper.reconstruct_config_from_virtual_graph( + self.max_ru_config) + max_config_value = compute_metric(origin_max_config) + else: + max_config_value = compute_metric(self.max_ru_config) + + for node_idx, layer_possible_bitwidths_indices in tqdm(self.layer_to_bitwidth_mapping.items(), + total=len(self.layer_to_bitwidth_mapping)): + layer_to_metrics_mapping[node_idx] = {} + + for bitwidth_idx in layer_possible_bitwidths_indices: + if self.max_ru_config[node_idx] == bitwidth_idx: + # This is a computation of the metric for the max configuration, assign pre-calculated value + layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value + continue + + # Create a configuration that differs at one layer only from the baseline model + mp_model_configuration = self.max_ru_config.copy() + mp_model_configuration[node_idx] = bitwidth_idx + + # Build a distance matrix using the function we got from the framework implementation. + if self.using_virtual_graph: + # Reconstructing original graph's configuration from virtual graph's configuration + origin_mp_model_configuration = \ + self.config_reconstruction_helper.reconstruct_config_from_virtual_graph( + mp_model_configuration, + changed_virtual_nodes_idx=[node_idx], + original_base_config=origin_max_config) + origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if + c != origin_mp_model_configuration[i]] + metric_value = compute_metric( + origin_mp_model_configuration, + origin_changed_nodes_indices, + origin_max_config) + else: + metric_value = compute_metric( + mp_model_configuration, + [node_idx], + self.max_ru_config) + + layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps) - def compute_resource_utilization_matrix(self, target: RUTarget) -> np.ndarray: + # Finalize distance metric mapping + self.finalize_distance_metric(layer_to_metrics_mapping) + + return layer_to_metrics_mapping + + def _get_mp_graph(self, graph: Graph, target_resource_utilization: ResourceUtilization) -> Tuple[Graph, bool]: """ - Computes and builds a resource utilization matrix, to be used for the mixed-precision search problem formalization. - Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0. + Get graph for mixed precision search. Virtual graph is built if bops is restricted and both activation and + weights are configurable. Args: - target: The resource target for which the resource utilization is calculated (a RUTarget value). + graph: input graph. + target_resource_utilization: target resource utilization. + + Returns: + Graph for mixed precision search (virtual or original), and a boolean flag whether a virtual graph has been + constructed. + """ + if (target_resource_utilization.bops_restricted() and + graph.has_any_configurable_activation() and + graph.has_any_configurable_weights()): + mp_graph = substitute(copy.deepcopy(graph), + self.fw_impl.get_substitutions_virtual_weights_activation_coupling()) + return mp_graph, True + + return graph, False + + def get_search_space(self) -> Dict[int, List[int]]: + """ + The search space is a mapping from a node's index to a list of integers (possible bitwidths candidates indeces + for the node). Returns: - A resource utilization matrix of shape (num configurations, num memory elements). Num memory elements - depends on the target, e.g. num nodes or num cuts, for which utilization is computed. + The entire search space of the graph. """ - assert isinstance(target, RUTarget), f"{target} is not a valid resource target" - configurable_sorted_nodes = self.graph.get_configurable_sorted_nodes(self.fw_info) + indices_mapping = {} + for idx, n in enumerate(self.mp_topo_configurable_nodes): + # For each node, get all possible bitwidth indices for it + # (which is a list from 0 to the length of the candidates mp_config list of the node). + indices_mapping[idx] = list(range(len(n.candidates_quantization_cfg))) # all search_methods space + return indices_mapping + + def _compute_relative_ru_matrices(self) -> Dict[RUTarget, np.ndarray]: + """ + Computes and builds a resource utilization matrix for all restricted targets, to be used for the + mixed-precision search problem formalization. + Utilization is computed relative to the minimal configuration, i.e. utilization for it will be 0. - ru_matrix = [] - for c, c_n in enumerate(configurable_sorted_nodes): + Returns: + A dictionary containing resource utilization matrix of shape (num configurations, num memory elements) + per ru target. Num memory elements depends on the target, e.g. num cuts or 1 for cumulative metrics. + """ + rus_per_candidate = defaultdict(list) + for c, c_n in enumerate(self.mp_topo_configurable_nodes): for candidate_idx in range(len(c_n.candidates_quantization_cfg)): if candidate_idx == self.min_ru_config[c]: - candidate_rus = self.min_ru[target] + candidate_rus = self.min_ru else: - candidate_rus = self.compute_node_ru_for_candidate(c, candidate_idx, target) + candidate_rus = self.compute_ru_for_candidate(c, candidate_idx) - ru_matrix.append(np.asarray(candidate_rus)) + for target, ru in candidate_rus.items(): + rus_per_candidate[target].append(ru) - np_ru_matrix = np.array(ru_matrix) - self.min_ru[target] # num configurations X num elements - return np_ru_matrix + # Each target contains a matrix of num configurations X num elements + relative_rus = {target: np.array(ru) - self.min_ru[target] for target, ru in rus_per_candidate.items()} + return relative_rus - def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, target: RUTarget) -> np.ndarray: + def compute_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int) -> Dict[RUTarget, np.ndarray]: """ Computes a resource utilization vector after replacing the given node's configuration candidate in the minimal target configuration with the given candidate index. @@ -151,13 +270,13 @@ def compute_node_ru_for_candidate(self, conf_node_idx: int, candidate_idx: int, Args: conf_node_idx: The index of a node in a sorted configurable nodes list. candidate_idx: Quantization config candidate to be used for the node's resource utilization computation. - target: The target for which the resource utilization is calculated (a RUTarget value). - Returns: Node's resource utilization vector. + Returns: + Node's resource utilization vector. """ cfg = self.replace_config_in_index(self.min_ru_config, conf_node_idx, candidate_idx) - return self.ru_helper.compute_utilization({target}, cfg)[target] + return self.ru_helper.compute_utilization(self.ru_targets, cfg) @staticmethod def replace_config_in_index(mp_cfg: List[int], idx: int, value: int) -> List[int]: @@ -191,7 +310,7 @@ def compute_resource_utilization_for_config(self, config: List[int]) -> Resource act_qcs, w_qcs = self.ru_helper.get_quantization_candidates(config) ru = self.ru_helper.ru_calculator.compute_resource_utilization( target_criterion=TargetInclusionCriterion.AnyQuantized, bitwidth_mode=BitwidthMode.QCustom, act_qcs=act_qcs, - w_qcs=w_qcs, ru_targets=self.ru_targets_to_compute, allow_unused_qcs=True) + w_qcs=w_qcs, ru_targets=self.ru_targets, allow_unused_qcs=True) return ru def finalize_distance_metric(self, layer_to_metrics_mapping: Dict[int, Dict[int, float]]): diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py index d2746da1b..afb03f06a 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py @@ -51,25 +51,34 @@ class ResourceUtilization: bops: float = np.inf def weight_restricted(self): - return self.weights_memory < np.inf + return self._is_restricted(self.weights_memory) def activation_restricted(self): - return self.activation_memory < np.inf + return self._is_restricted(self.activation_memory) def total_mem_restricted(self): - return self.total_memory < np.inf + return self._is_restricted(self.total_memory) def bops_restricted(self): - return self.bops < np.inf + return self._is_restricted(self.bops) - def get_resource_utilization_dict(self) -> Dict[RUTarget, float]: + def get_resource_utilization_dict(self, restricted_only: bool = False) -> Dict[RUTarget, float]: """ - Returns: a dictionary with the ResourceUtilization object's values for each resource utilization target. + Get resource utilization as a dictionary. + + Args: + restricted_only: whether to include only targets with restricted utilization. + + Returns: + A dictionary containing the resource utilization with targets as keys. """ - return {RUTarget.WEIGHTS: self.weights_memory, - RUTarget.ACTIVATION: self.activation_memory, - RUTarget.TOTAL: self.total_memory, - RUTarget.BOPS: self.bops} + ru_dict = {RUTarget.WEIGHTS: self.weights_memory, + RUTarget.ACTIVATION: self.activation_memory, + RUTarget.TOTAL: self.total_memory, + RUTarget.BOPS: self.bops} + if restricted_only: + ru_dict = {k: v for k, v in ru_dict.items() if self._is_restricted(v)} + return ru_dict def is_satisfied_by(self, ru: 'ResourceUtilization') -> bool: """ @@ -114,3 +123,6 @@ def get_summary_str(self, restricted: bool): if RUTarget.BOPS in targets: summary.append(f"BOPS: {self.bops}") return ', '.join(summary) + + def _is_restricted(self, v): + return v < np.inf diff --git a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py index 408e5a598..07f350d53 100644 --- a/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py +++ b/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py @@ -431,8 +431,7 @@ def compute_node_activation_tensor_utilization(self, Returns: Node's activation utilization. """ - if qc and bitwidth_mode != BitwidthMode.QCustom: - raise ValueError(self.unexpected_qc_error) + self._validate_custom_qcs(qc, bitwidth_mode) if target_criterion: # only check whether the node meets the criterion @@ -470,9 +469,6 @@ def compute_bops(self, - Total BOPS count of the network. - Detailed BOPS count per node. """ - self._validate_custom_qcs(act_qcs, bitwidth_mode) - self._validate_custom_qcs(w_qcs, bitwidth_mode) - nodes_bops = {} for n in self.graph.get_topo_sorted_nodes(): w_qc = w_qcs.get(n.name) if w_qcs else None diff --git a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py index 1a3b2102c..4e5155ad4 100644 --- a/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py +++ b/model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py @@ -12,326 +12,146 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== - import numpy as np from pulp import * -from tqdm import tqdm -from typing import Dict, Tuple, Any, Optional +from typing import Dict, Tuple -from model_compression_toolkit.logger import Logger -from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import ResourceUtilization, RUTarget -from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_manager import MixedPrecisionSearchManager +from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import RUTarget # Limit ILP solver runtime in seconds SOLVER_TIME_LIMIT = 60 -def mp_integer_programming_search(search_manager: MixedPrecisionSearchManager, - target_resource_utilization: ResourceUtilization = None) -> np.ndarray: - """ - Searching and returning a mixed-precision configuration using an ILP optimization solution. - It first builds a mapping from each layer's index (in the model) to a dictionary that maps the - bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer. - Then, it creates a mapping from each node's index (in the graph) to a dictionary - that maps the bitwidth index to the contribution of configuring this node with this - bitwidth to the minimal possible resource utilization of the model. - Then, and using these mappings, it builds an LP problem and finds an optimal solution. - If a solution could not be found, exception is thrown. - - Args: - search_manager: MixedPrecisionSearchManager object to be used for problem formalization. - target_resource_utilization: Target resource utilization to constrain our LP problem with some resources limitations (like model' weights memory - consumption). - - Returns: - The mixed-precision configuration (1-D array of indices. Each indicates the bitwidth index of a node). +class MixedPrecisionIntegerLPSolver: + """ Integer Linear Programming solver for Mixed Precision. + Args: + layer_to_sensitivity_mapping: sensitivity per candidate per layer. + candidates_ru: resource utilization per candidate. + ru_constraints: resource utilization constraints corresponding to 'candidates_ru'. """ - - # Build a mapping from each layer's index (in the model) to a dictionary that maps the - # bitwidth index to the observed sensitivity of the model when using that bitwidth for that layer. - - if target_resource_utilization is None or search_manager is None: - Logger.critical("Invalid parameters: 'target_resource_utilization' and 'search_manager' must not be 'None' " - "for mixed-precision search. Ensure valid inputs are provided.") - - layer_to_metrics_mapping = _build_layer_to_metrics_mapping(search_manager, target_resource_utilization) - - # Init variables to find their values when solving the lp problem. - layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping = _init_problem_vars(layer_to_metrics_mapping) - - # Add all equations and inequalities that define the problem. - lp_problem = _formalize_problem(layer_to_indicator_vars_mapping, - layer_to_metrics_mapping, - layer_to_objective_vars_mapping, - target_resource_utilization, - search_manager) - - # Use default PULP solver. Limit runtime in seconds - solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT) - lp_problem.solve(solver=solver) # Try to solve the problem. - - assert lp_problem.status == LpStatusOptimal, Logger.critical( - "No solution was found during solving the LP problem") - Logger.info(f"ILP status: {LpStatus[lp_problem.status]}") - - # Take the bitwidth index only if its corresponding indicator is one. - config = np.asarray( - [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for - nbits_to_indicator - in layer_to_indicator_vars_mapping.values()] - ).flatten() - - if target_resource_utilization.bops_restricted(): - return search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(config) - else: - return config - - -def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[ - Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]: - """ - Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use, - and a variable for each indicator for whether we use the former variable or not. - - Args: - layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the - bitwidth index to the observed sensitivity of the model. - - Returns: - A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem, - and the second for indicators for each variable. - """ - - layer_to_indicator_vars_mapping = dict() - layer_to_objective_vars_mapping = dict() - - for layer, nbits_to_metric in layer_to_metrics_mapping.items(): - layer_to_indicator_vars_mapping[layer] = dict() - - for nbits in nbits_to_metric.keys(): - layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}", - lowBound=0, - upBound=1, - cat=LpInteger) - - layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0) - - return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping - - -def _formalize_problem(layer_to_indicator_vars_mapping: Dict[int, Dict[int, LpVariable]], - layer_to_metrics_mapping: Dict[int, Dict[int, float]], - layer_to_objective_vars_mapping: Dict[int, LpVariable], - target_resource_utilization: ResourceUtilization, - search_manager: MixedPrecisionSearchManager) -> LpProblem: - """ - Formalize the LP problem by defining all inequalities that define the solution space. - - Args: - layer_to_indicator_vars_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to - indicator variable. - layer_to_metrics_mapping: Dictionary that maps each node's index to a dictionary of bitwidth to sensitivity - evaluation. - layer_to_objective_vars_mapping: Dictionary that maps each node's index to a bitwidth variable we find its - value. - target_resource_utilization: Target resource utilization to reduce our feasible solution space. - search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization. - - Returns: - The formalized LP problem. - """ - - lp_problem = LpProblem() # minimization problem by default - lp_problem += lpSum([layer_to_objective_vars_mapping[layer] for layer in - layer_to_metrics_mapping.keys()]) # Objective (minimize acc loss) - - for layer in layer_to_metrics_mapping.keys(): - # Use every bitwidth for every layer with its indicator. - lp_problem += lpSum([indicator * layer_to_metrics_mapping[layer][nbits] - for nbits, indicator in layer_to_indicator_vars_mapping[layer].items()]) == \ - layer_to_objective_vars_mapping[layer] - - # Constraint of only one indicator==1 - lp_problem += lpSum( - [v for v in layer_to_indicator_vars_mapping[layer].values()]) == 1 - - # Bound the feasible solution space with the desired resource utilization values. - # Creates separate constraints for weights utilization and activation utilization. - if target_resource_utilization is not None: + def __init__(self, layer_to_sensitivity_mapping: Dict[int, Dict[int, float]], + candidates_ru: Dict[RUTarget, np.ndarray], + ru_constraints: Dict[RUTarget, np.ndarray]): + self.layer_to_sensitivity_mapping = layer_to_sensitivity_mapping + self.candidates_ru = candidates_ru + self.ru_constraints = ru_constraints + + self.layer_to_indicator_vars_mapping, self.layer_to_objective_vars_mapping = ( + self._init_problem_vars(layer_to_sensitivity_mapping)) + + def run(self) -> List[int]: + """ + Build and solve an ILP optimization problem. + + Returns: + The mixed-precision configuration (A list of indices. Each indicates the bitwidth index of a node). + + """ + # Add all equations and inequalities that define the problem. + lp_problem = self._formalize_problem() + + # Use default PULP solver. Limit runtime in seconds + solver = PULP_CBC_CMD(timeLimit=SOLVER_TIME_LIMIT) + lp_problem.solve(solver=solver) # Try to solve the problem. + + if lp_problem.status != LpStatusOptimal: + raise RuntimeError(f'No solution was found for the LP problem, with status {lp_problem.status}') + + # Take the bitwidth index only if its corresponding indicator is one. + config = np.asarray( + [[nbits for nbits, indicator in nbits_to_indicator.items() if indicator.varValue == 1.0] for + nbits_to_indicator + in self.layer_to_indicator_vars_mapping.values()] + ).flatten() + + return config.tolist() + + @staticmethod + def _init_problem_vars(layer_to_metrics_mapping: Dict[int, Dict[int, float]]) -> Tuple[ + Dict[int, Dict[int, LpVariable]], Dict[int, LpVariable]]: + """ + Initialize the LP problem variables: Variable for each layer as to the index of the bitwidth it should use, + and a variable for each indicator for whether we use the former variable or not. + + Args: + layer_to_metrics_mapping: Mapping from each layer's index (in the model) to a dictionary that maps the + bitwidth index to the observed sensitivity of the model. + + Returns: + A tuple of two dictionaries: One from a layer to the variable for the bitwidth problem, + and the second for indicators for each variable. + """ + + layer_to_indicator_vars_mapping = dict() + layer_to_objective_vars_mapping = dict() + + for layer, nbits_to_metric in layer_to_metrics_mapping.items(): + layer_to_indicator_vars_mapping[layer] = dict() + + for nbits in nbits_to_metric.keys(): + layer_to_indicator_vars_mapping[layer][nbits] = LpVariable(f"layer_{layer}_{nbits}", + lowBound=0, + upBound=1, + cat=LpInteger) + + layer_to_objective_vars_mapping[layer] = LpVariable(f"s_{layer}", 0) + + return layer_to_indicator_vars_mapping, layer_to_objective_vars_mapping + + def _formalize_problem(self) -> LpProblem: + """ + Formalize the LP problem by defining all inequalities that define the solution space. + + Returns: + The formalized LP problem. + """ + + lp_problem = LpProblem() # minimization problem by default + lp_problem += lpSum([self.layer_to_objective_vars_mapping[layer] for layer in + self.layer_to_sensitivity_mapping.keys()]) # Objective (minimize acc loss) + + for layer in self.layer_to_sensitivity_mapping.keys(): + # Use every bitwidth for every layer with its indicator. + lp_problem += lpSum([indicator * self.layer_to_sensitivity_mapping[layer][nbits] + for nbits, indicator in self.layer_to_indicator_vars_mapping[layer].items()]) == \ + self.layer_to_objective_vars_mapping[layer] + + # Constraint of only one indicator==1 + lp_problem += lpSum( + [v for v in self.layer_to_indicator_vars_mapping[layer].values()]) == 1 + + # Bound the feasible solution space with the desired resource utilization values. + self._add_ru_constraints(lp_problem=lp_problem) + + return lp_problem + + def _add_ru_constraints(self, lp_problem: LpProblem): + """ + Adding targets constraints for the Lp problem for the given target resource utilization. + The update to the Lp problem object is done inplace. + + Args: + lp_problem: An Lp problem object to add constraint to. + """ indicators = [] - for layer in layer_to_metrics_mapping.keys(): - for _, indicator in layer_to_indicator_vars_mapping[layer].items(): - indicators.append(indicator) - - indicators_arr = np.array(indicators) - indicators_matrix = np.diag(indicators_arr) - - _add_ru_constraints(search_manager=search_manager, - target_resource_utilization=target_resource_utilization, - indicators_matrix=indicators_matrix, - lp_problem=lp_problem, - non_conf_ru_dict=search_manager.non_conf_ru_dict) - else: # pragma: no cover - Logger.critical("Unable to execute mixed-precision search: 'target_resource_utilization' is None. " - "A valid 'target_resource_utilization' is required.") - return lp_problem - - -def _add_ru_constraints(search_manager: MixedPrecisionSearchManager, - target_resource_utilization: ResourceUtilization, - indicators_matrix: np.ndarray, - lp_problem: LpProblem, - non_conf_ru_dict: Dict[RUTarget, np.ndarray]): - """ - Adding targets constraints for the Lp problem for the given target resource utilization. - The update to the Lp problem object is done inplace. - - Args: - search_manager: MixedPrecisionSearchManager object to be used for resource utilization constraints formalization. - target_resource_utilization: Target resource utilization. - indicators_matrix: A diagonal matrix of the Lp problem's indicators. - lp_problem: An Lp problem object to add constraint to. - non_conf_ru_dict: A non-configurable nodes' resource utilization vectors for the constrained targets. - """ - ru_indicated_vectors = {} - # targets to add constraints for - constraints_targets = target_resource_utilization.get_restricted_targets() - # to add constraints for Total target we need to compute weight and activation - targets_to_compute = constraints_targets - if RUTarget.TOTAL in constraints_targets: - targets_to_compute = targets_to_compute.union({RUTarget.ACTIVATION, RUTarget.WEIGHTS}) - {RUTarget.TOTAL} - - for target in targets_to_compute: - ru_matrix = search_manager.compute_resource_utilization_matrix(target) # num elements X num configurations - indicated_ru_matrix = np.matmul(ru_matrix.T, indicators_matrix) # num elements X num configurations - - # Sum the indicated values over all configurations, and add the value for minimal configuration once. - # Indicated utilization values are relative to the minimal configuration, i.e. they represent the extra memory - # that would be required if that configuration is selected). - # Each element in a vector is an lp object representing the configurations sum term for a memory element. - ru_vec = indicated_ru_matrix.sum(axis=1) + search_manager.min_ru[target] - - non_conf_ru_vec = non_conf_ru_dict[target] - if non_conf_ru_vec is not None and non_conf_ru_vec.size: - # add non-conf value as additional mem elements so that they get aggregated - ru_vec = np.concatenate([ru_vec, non_conf_ru_vec]) - ru_indicated_vectors[target] = ru_vec - - # Add constraints only for the restricted targets in target resource utilization. - # Adding activation constraints modifies the lp term in ru_indicated_vectors, so if both activation and total - # are restricted we first add the constraints for total. - if RUTarget.TOTAL in constraints_targets and RUTarget.ACTIVATION in constraints_targets: - constraints_targets.remove(RUTarget.ACTIVATION) - constraints_targets = list(constraints_targets) + [RUTarget.ACTIVATION] - for target in constraints_targets: - target_resource_utilization_value = target_resource_utilization.get_resource_utilization_dict()[target] - aggr_ru = _aggregate_for_lp(ru_indicated_vectors, target) - for v in aggr_ru: - if isinstance(v, float): - if v > target_resource_utilization_value: - Logger.critical( - f"The model cannot be quantized to meet the specified target resource utilization {target.value} " - f"with the value {target_resource_utilization_value}.") # pragma: no cover - else: - lp_problem += v <= target_resource_utilization_value - - -def _aggregate_for_lp(targets_ru_vec: Dict[RUTarget, Any], target: RUTarget) -> list: - """ - Aggregate resource utilization values for the LP. - - Args: - targets_ru_vec: resource utilization vectors for all precomputed targets. - target: resource utilization target. - - Returns: - Aggregated resource utilization. - """ - if target == RUTarget.TOTAL: - w = lpSum(targets_ru_vec[RUTarget.WEIGHTS]) - act_ru_vec = targets_ru_vec[RUTarget.ACTIVATION] - return [w + v for v in act_ru_vec] - - if target in [RUTarget.WEIGHTS, RUTarget.BOPS]: - return [lpSum(targets_ru_vec[target])] - - if target == RUTarget.ACTIVATION: - # for max aggregation, each value constitutes a separate constraint - return list(targets_ru_vec[target]) - - raise ValueError(f'Unexpected target {target}.') # pragma: no cover - - -def _build_layer_to_metrics_mapping(search_manager: MixedPrecisionSearchManager, - target_resource_utilization: ResourceUtilization, - eps: float = EPS) -> Dict[int, Dict[int, float]]: - """ - This function measures the sensitivity of a change in a bitwidth of a layer on the entire model. - It builds a mapping from a node's index, to its bitwidht's effect on the model sensitivity. - For each node and some possible node's bitwidth (according to the given search space), we use - the framework function compute_metric_fn in order to infer - a batch of images, and compute (using the inference results) the sensitivity metric of - the configured mixed-precision model. - - Args: - search_manager: MixedPrecisionSearchManager object to be used for problem formalization. - target_resource_utilization: ResourceUtilization to constrain our LP problem with some resources limitations - (like model' weights memory consumption). - eps: Epsilon value to manually increase metric value (if necessary) for numerical stability - - Returns: - Mapping from each node's index in a graph, to a dictionary from the bitwidth index (of this node) to - the sensitivity of the model. - - """ - - Logger.info('Starting to evaluate metrics') - layer_to_metrics_mapping = {} - - if target_resource_utilization.bops_restricted(): - origin_max_config = search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph(search_manager.max_ru_config) - max_config_value = search_manager.compute_metric_fn(origin_max_config) - else: - max_config_value = search_manager.compute_metric_fn(search_manager.max_ru_config) - - for node_idx, layer_possible_bitwidths_indices in tqdm(search_manager.layer_to_bitwidth_mapping.items(), - total=len(search_manager.layer_to_bitwidth_mapping)): - layer_to_metrics_mapping[node_idx] = {} - - for bitwidth_idx in layer_possible_bitwidths_indices: - if search_manager.max_ru_config[node_idx] == bitwidth_idx: - # This is a computation of the metric for the max configuration, assign pre-calculated value - layer_to_metrics_mapping[node_idx][bitwidth_idx] = max_config_value - continue - - # Create a configuration that differs at one layer only from the baseline model - mp_model_configuration = search_manager.max_ru_config.copy() - mp_model_configuration[node_idx] = bitwidth_idx - - # Build a distance matrix using the function we got from the framework implementation. - if target_resource_utilization.bops_restricted(): - # Reconstructing original graph's configuration from virtual graph's configuration - origin_mp_model_configuration = \ - search_manager.config_reconstruction_helper.reconstruct_config_from_virtual_graph( - mp_model_configuration, - changed_virtual_nodes_idx=[node_idx], - original_base_config=origin_max_config) - origin_changed_nodes_indices = [i for i, c in enumerate(origin_max_config) if - c != origin_mp_model_configuration[i]] - metric_value = search_manager.compute_metric_fn( - origin_mp_model_configuration, - origin_changed_nodes_indices, - origin_max_config) - else: - metric_value = search_manager.compute_metric_fn( - mp_model_configuration, - [node_idx], - search_manager.max_ru_config) - - layer_to_metrics_mapping[node_idx][bitwidth_idx] = max(metric_value, max_config_value + eps) - - # Finalize distance metric mapping - search_manager.finalize_distance_metric(layer_to_metrics_mapping) - - return layer_to_metrics_mapping + for layer in self.layer_to_sensitivity_mapping: + indicators.extend(list(self.layer_to_indicator_vars_mapping[layer].values())) + indicators_vec = np.array(indicators) + + for target, ru_matrix in self.candidates_ru.items(): + # We expect 2d matrix of shape (num candidates, m). For cumulative metrics (weights, bops) m=1 - overall + # utilization. For max metrics (activation, total) m=num memory elements (max element depends on configuration) + assert ru_matrix.ndim == 2 + if target in [RUTarget.WEIGHTS, RUTarget.BOPS]: + assert ru_matrix.shape[1] == 1 + + indicated_ru_matrix = ru_matrix.T * indicators_vec + # build lp sum term over all candidates + ru_vec = indicated_ru_matrix.sum(axis=1) + + # For cumulative metrics a single constraint is added, for max metrics a separate constraint + # is added for each memory element (each element < target => max element < target). + assert len(ru_vec) == len(self.ru_constraints[target]) + for v, c in zip(ru_vec, self.ru_constraints[target]): + lp_problem += v <= c diff --git a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py index 16c49ad53..7277d662e 100644 --- a/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py +++ b/model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py @@ -63,7 +63,7 @@ def greedy_solution_refinement_procedure(mp_solution: List[int], # layer has max config in the given solution, nothing to optimize continue - current_node = search_manager.graph.get_configurable_sorted_nodes(search_manager.fw_info)[node_idx] + current_node = search_manager.mp_topo_configurable_nodes[node_idx] node_candidates = current_node.candidates_quantization_cfg # only weights kernel attribute is quantized with weights mixed precision diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py index d9bbb6b8b..60b728065 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/qat/qat_test.py @@ -308,7 +308,7 @@ def run_test(self, **kwargs): def compare(self, qat_ready_model, quantization_info): - self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_mp_cfg)) + self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_mp_cfg) # check that quantizer gets multiple bits configuration for layer in qat_ready_model.layers: diff --git a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py index 252a38d9a..5e4c2401a 100644 --- a/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py +++ b/tests/keras_tests/feature_networks_tests/feature_networks/weights_mixed_precision_tests.py @@ -259,7 +259,7 @@ def get_resource_utilization(self): def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - assert (quantization_info.mixed_precision_cfg == [1, 1]).all() + assert quantization_info.mixed_precision_cfg == [1, 1] for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 16) @@ -300,7 +300,7 @@ def create_networks(self): def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - self.unit_test.assertTrue((quantization_info.mixed_precision_cfg != 0).any()) + self.unit_test.assertTrue(any(i for i in quantization_info.mixed_precision_cfg)) for i in range(32): # quantized per channel self.unit_test.assertTrue( @@ -325,7 +325,7 @@ def get_resource_utilization(self): def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - assert (quantization_info.mixed_precision_cfg == [2, 2]).all() + assert quantization_info.mixed_precision_cfg == [2, 2] for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 4) @@ -443,7 +443,7 @@ def get_resource_utilization(self): def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - assert (quantization_info.mixed_precision_cfg == [0, 1]).all() + assert quantization_info.mixed_precision_cfg == [0, 1] for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) @@ -466,8 +466,8 @@ def get_resource_utilization(self): def _compare(self, quantized_model, float_model, input_x=None, quantization_info=None): conv_layers = get_layers_from_model_by_type(quantized_model, layers.Conv2D) - assert any([(quantization_info.mixed_precision_cfg == [1, 0]).all(), - (quantization_info.mixed_precision_cfg == [0, 1]).all()]) + assert any([quantization_info.mixed_precision_cfg == [1, 0], + quantization_info.mixed_precision_cfg == [0, 1]]) for i in range(32): # quantized per channel self.unit_test.assertTrue( np.unique(conv_layers[0].get_quantized_weights()['kernel'][:, :, :, i]).flatten().shape[0] <= 256) diff --git a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py index 36de90950..b9c94bde1 100644 --- a/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py +++ b/tests/keras_tests/non_parallel_tests/test_lp_search_bitwidth.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== +from unittest.mock import Mock + import numpy as np import unittest @@ -25,9 +27,8 @@ from model_compression_toolkit.core.common.mixed_precision.mixed_precision_search_facade import search_bit_width, \ BitWidthSearchMethod from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \ - mp_integer_programming_search + MixedPrecisionIntegerLPSolver from model_compression_toolkit.core.common.model_collector import ModelCollector -from model_compression_toolkit.core.common.quantization.bit_width_config import BitWidthConfig from model_compression_toolkit.core.common.quantization.core_config import CoreConfig from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_computation import \ calculate_quantization_params @@ -42,7 +43,6 @@ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.latest import \ get_op_quantization_configs from tests.keras_tests.tpc_keras import get_weights_only_mp_tpc_keras -from pulp import lpSum class MockReconstructionHelper: @@ -57,144 +57,122 @@ def reconstruct_config_from_virtual_graph(self, class MockMixedPrecisionSearchManager: - def __init__(self, layer_to_ru_mapping): + def __init__(self, layer_to_ru_mapping, ru_targets): + self.ru_targets = ru_targets self.layer_to_bitwidth_mapping = {0: [0, 1, 2]} self.layer_to_ru_mapping = layer_to_ru_mapping - self.compute_metric_fn = lambda x, y=None, z=None: {0: 2, 1: 1, 2: 0}[x[0]] - self.min_ru = {RUTarget.WEIGHTS: [1], - RUTarget.ACTIVATION: [1], - RUTarget.BOPS: [1]} # minimal resource utilization in the tests layer_to_ru_mapping + self.min_ru = {t: np.array([1]) for t in ru_targets} # minimal resource utilization in the tests layer_to_ru_mapping self.max_ru_config = [0] self.config_reconstruction_helper = MockReconstructionHelper() - self.non_conf_ru_dict = {RUTarget.WEIGHTS: None, RUTarget.ACTIVATION: None, RUTarget.BOPS: None} - def compute_resource_utilization_matrix(self, target): - # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test) - if target == RUTarget.WEIGHTS: - ru_matrix = [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] - elif target == RUTarget.ACTIVATION: - ru_matrix = [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] - elif target == RUTarget.BOPS: - ru_matrix = [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] - else: - raise ValueError('Not supposed to get here') - return np.array(ru_matrix).T + def build_sensitivity_mapping(self): + return {0: {0: 0, 1: 1, 2: 2}} - def finalize_distance_metric(self, d): - return d + def compute_resource_utilization_matrices(self): + # minus 1 is normalization by the minimal resource utilization (which is always 1 in this test) + ru = { + RUTarget.WEIGHTS: + [np.flip(np.array([ru.weights_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))], + RUTarget.ACTIVATION: + [np.flip(np.array([ru.activation_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))], + RUTarget.BOPS: + [np.flip(np.array([ru.bops - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))], + RUTarget.TOTAL: + [np.flip(np.array([ru.total_memory - 1 for _, ru in self.layer_to_ru_mapping[0].items()]))] + } + return {k: np.array(v).T for k, v in ru.items() if k in self.ru_targets} class TestLpSearchBitwidth(unittest.TestCase): + def _execute(self, mock_search_mgr, target_resource_utilization): + candidates_sensitivity = mock_search_mgr.build_sensitivity_mapping() + candidates_ru = mock_search_mgr.compute_resource_utilization_matrices() + min_ru = mock_search_mgr.min_ru + ru_constraints = {k: v - min_ru[k] for k, v in target_resource_utilization.get_resource_utilization_dict(restricted_only=True).items()} + lp_solver = MixedPrecisionIntegerLPSolver(candidates_sensitivity, candidates_ru, ru_constraints) + return lp_solver.run() + def test_search_weights_only(self): target_resource_utilization = ResourceUtilization(weights_memory=2) layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1), 1: ResourceUtilization(weights_memory=2), 0: ResourceUtilization(weights_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) - - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS}) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) target_resource_utilization = ResourceUtilization(weights_memory=0) # Infeasible solution! with self.assertRaises(Exception): - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=ResourceUtilization(weights_memory=np.inf)) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=1000)) self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 0) # ResourceUtilization is Inf so expecting for the maximal bit-width result + self.assertTrue(bit_cfg[0] == 0) # expecting for the maximal bit-width result target_resource_utilization = None # target ResourceUtilization is not defined! with self.assertRaises(Exception): - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) - def test_search_weights_only_with_non_conf(self): - target_resource_utilization = ResourceUtilization(weights_memory=2+11) - layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1), - 1: ResourceUtilization(weights_memory=2), - 0: ResourceUtilization(weights_memory=3)} - } - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) - mock_search_manager.non_conf_ru_dict = {RUTarget.WEIGHTS: np.array([5, 6])} - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) - - self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 1) - - # make sure non_conf was taken into account and lower target has a different solution - target_resource_utilization = ResourceUtilization(weights_memory=2 + 10.9) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) - self.assertFalse(bit_cfg[0] == 1) + with self.assertRaises(Exception): + self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=np.inf)) def test_search_activation_only(self): target_resource_utilization = ResourceUtilization(activation_memory=2) layer_to_ru_mapping = {0: {2: ResourceUtilization(activation_memory=1), 1: ResourceUtilization(activation_memory=2), 0: ResourceUtilization(activation_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.ACTIVATION}) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) target_resource_utilization = ResourceUtilization(activation_memory=0) # Infeasible solution! with self.assertRaises(Exception): - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=ResourceUtilization( - activation_memory=np.inf)) + bit_cfg = self._execute(mock_search_manager, + target_resource_utilization=ResourceUtilization(activation_memory=1000)) self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 0) # ResourceUtilization is Inf so expecting for the maximal bit-width result + self.assertTrue(bit_cfg[0] == 0) # expecting for the maximal bit-width result def test_search_weights_and_activation(self): target_resource_utilization = ResourceUtilization(weights_memory=2, activation_memory=2) layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1), 1: ResourceUtilization(weights_memory=2, activation_memory=2), 0: ResourceUtilization(weights_memory=3, activation_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.WEIGHTS, RUTarget.ACTIVATION}) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) target_resource_utilization = ResourceUtilization(weights_memory=0, activation_memory=0) # Infeasible solution! with self.assertRaises(Exception): - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=ResourceUtilization(weights_memory=np.inf, - activation_memory=np.inf)) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=ResourceUtilization(weights_memory=1000, + activation_memory=1000)) self.assertTrue(len(bit_cfg) == 1) - self.assertTrue(bit_cfg[0] == 0) # ResourceUtilization is Inf so expecting for the maximal bit-width result + self.assertTrue(bit_cfg[0] == 0) # expecting for the maximal bit-width result def test_search_total_resource_utilization(self): target_resource_utilization = ResourceUtilization(total_memory=4) - layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1), - 1: ResourceUtilization(weights_memory=2, activation_memory=2), - 0: ResourceUtilization(weights_memory=3, activation_memory=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) + layer_to_ru_mapping = {0: {2: ResourceUtilization(weights_memory=1, activation_memory=1, total_memory=2), + 1: ResourceUtilization(weights_memory=2, activation_memory=2, total_memory=4), + 0: ResourceUtilization(weights_memory=3, activation_memory=3, total_memory=6)}} + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.TOTAL}) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) @@ -204,10 +182,9 @@ def test_search_bops_ru(self): layer_to_ru_mapping = {0: {2: ResourceUtilization(bops=1), 1: ResourceUtilization(bops=2), 0: ResourceUtilization(bops=3)}} - mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping) + mock_search_manager = MockMixedPrecisionSearchManager(layer_to_ru_mapping, {RUTarget.BOPS}) - bit_cfg = mp_integer_programming_search(mock_search_manager, - target_resource_utilization=target_resource_utilization) + bit_cfg = self._execute(mock_search_manager, target_resource_utilization=target_resource_utilization) self.assertTrue(len(bit_cfg) == 1) self.assertTrue(bit_cfg[0] == 1) @@ -272,32 +249,14 @@ def representative_data_gen(): representative_data_gen, fw_info=fw_info) - cfg = search_bit_width(graph_to_search_cfg=graph, + cfg = search_bit_width(graph=graph, fw_info=DEFAULT_KERAS_INFO, fw_impl=keras_impl, - target_resource_utilization=ResourceUtilization(np.inf), + target_resource_utilization=ResourceUtilization(weights_memory=100), mp_config=core_config.mixed_precision_config, representative_data_gen=representative_data_gen, search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING) - with self.assertRaises(Exception): - cfg = search_bit_width(graph_to_search_cfg=graph, - fw_info=DEFAULT_KERAS_INFO, - fw_impl=keras_impl, - target_resource_utilization=ResourceUtilization(np.inf), - mp_config=core_config.mixed_precision_config, - representative_data_gen=representative_data_gen, - search_method=None) - - with self.assertRaises(Exception): - cfg = search_bit_width(graph_to_search_cfg=graph, - fw_info=DEFAULT_KERAS_INFO, - fw_impl=keras_impl, - target_resource_utilization=None, - mp_config=core_config.mixed_precision_config, - representative_data_gen=representative_data_gen, - search_method=BitWidthSearchMethod.INTEGER_PROGRAMMING) - def test_mixed_precision_search_facade(self): core_config_avg_weights = CoreConfig(quantization_config=DEFAULTCONFIG, mixed_precision_config=MixedPrecisionQuantizationConfig(compute_mse, diff --git a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py index 120ef70a9..7c830a4b6 100644 --- a/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py +++ b/tests/keras_tests/non_parallel_tests/test_tensorboard_writer.py @@ -162,7 +162,8 @@ def plot_tensor_sizes(self, core_config): fqc=fqc, network_editor=[], quant_config=cfg, - target_resource_utilization=mct.core.ResourceUtilization(), + target_resource_utilization=mct.core.ResourceUtilization(weights_memory=73, + activation_memory=191), n_iter=1, analyze_similarity=True, mp_cfg=mp_cfg) diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py index dfc9edb13..9e2fbf6d0 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_activation_test.py @@ -72,7 +72,7 @@ def compare(self, quantized_model, float_model, input_x=None, quantization_info: raise NotImplementedError def verify_config(self, result_config, expected_config): - self.unit_test.assertTrue(all(result_config == expected_config), + self.unit_test.assertTrue(result_config == expected_config, f"Configuration mismatch: expected {expected_config} but got {result_config}.") diff --git a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py index f09fb5b53..1dd065fe0 100644 --- a/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/mixed_precision_weights_test.py @@ -335,7 +335,7 @@ def get_resource_utilization(self): return ResourceUtilization(80) def compare(self, quantized_models, float_model, input_x=None, quantization_info=None): - self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == self.expected_config)) + self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == self.expected_config) class MixedPrecisionWeightsTestNet(torch.nn.Module): diff --git a/tests/pytorch_tests/model_tests/feature_models/qat_test.py b/tests/pytorch_tests/model_tests/feature_models/qat_test.py index 73154073c..e650feb44 100644 --- a/tests/pytorch_tests/model_tests/feature_models/qat_test.py +++ b/tests/pytorch_tests/model_tests/feature_models/qat_test.py @@ -289,7 +289,7 @@ def run_test(self): input_x=self.representative_data_gen(), quantization_info=quantization_info) - self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == [1, 0, 0, 1, 0])) + self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1, 0, 0, 1, 0]) # check that quantizer gets multiple bits configuration for _, layer in qat_ready_model.named_children(): @@ -336,7 +336,7 @@ def run_test(self): quantization_info=quantization_info) # check that MP search doesn't return 8 bits configuration for all layers - self.unit_test.assertTrue(all(quantization_info.mixed_precision_cfg == [1, 1, 0, 0, 0])) + self.unit_test.assertTrue(quantization_info.mixed_precision_cfg == [1, 1, 0, 0, 0]) # check that quantizer gets multiple bits configuration for _, layer in qat_ready_model.named_children(): diff --git a/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py b/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py index 5a5fbeee5..7d29842af 100644 --- a/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py +++ b/tests_pytest/common_tests/unit_tests/core/mixed_precision/test_greedy_solution_refinement.py @@ -25,7 +25,7 @@ @pytest.fixture def search_manager(): manager = Mock() - manager.graph.get_configurable_sorted_nodes = MagicMock() + manager.mp_topo_configurable_nodes = MagicMock() manager.fw_info.get_kernel_op_attributes = MagicMock() manager.replace_config_in_index = MagicMock( side_effect=lambda config, idx, candidate: ( @@ -105,7 +105,7 @@ def test_greedy_solution_refinement_procedure( node_mock = Mock() node_mock.candidates_quantization_cfg = candidate_configs(weight_bits_dict_0, act_bits_0, weight_bits_dict_1, act_bits_1) - search_manager.graph.get_configurable_sorted_nodes.return_value = [node_mock] + search_manager.mp_topo_configurable_nodes = [node_mock] search_manager.compute_resource_utilization_for_config = MagicMock(side_effect=lambda config: { 0: ResourceUtilization(**alternative_candidate_resources_usage),