1212# See the License for the specific language governing permissions and
1313# limitations under the License.
1414# ==============================================================================
15- from typing import List , Set , Dict , Optional , Tuple , Any
15+ from typing import List , Set , Dict , Tuple
1616
1717import numpy as np
1818
1919from model_compression_toolkit .core import FrameworkInfo
20- from model_compression_toolkit .core .common import Graph , BaseNode
20+ from model_compression_toolkit .core .common import Graph
2121from model_compression_toolkit .core .common .framework_implementation import FrameworkImplementation
2222from model_compression_toolkit .core .common .mixed_precision .resource_utilization_tools .resource_utilization import \
2323 RUTarget
@@ -36,42 +36,46 @@ def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImple
3636 self .fw_impl = fw_impl
3737 self .ru_calculator = ResourceUtilizationCalculator (graph , fw_impl , fw_info )
3838
39- def compute_utilization (self , ru_targets : Set [RUTarget ], mp_cfg : Optional [ List [int ] ]) -> Dict [RUTarget , np .ndarray ]:
39+ def compute_utilization (self , ru_targets : Set [RUTarget ], mp_cfg : List [int ]) -> Dict [RUTarget , np .ndarray ]:
4040 """
41- Compute utilization of requested targets for a specific configuration in the format expected by LP problem
42- formulation namely a vector of ru values for relevant memory elements (nodes or cuts) in a constant order
43- (between calls) .
41+ Compute utilization of requested targets for a specific configuration:
42+ for weights and bops - total utilization,
43+ for activations and total - utilization per cut .
4444
4545 Args:
4646 ru_targets: resource utilization targets to compute.
4747 mp_cfg: a list of candidates indices for configurable layers.
4848
4949 Returns:
50- Dict of the computed utilization per target.
50+ Dict of the computed utilization per target, as 1d vector .
5151 """
52-
53- ru = {}
54- act_qcs , w_qcs = self .get_quantization_candidates (mp_cfg ) if mp_cfg else (None , None )
55- if RUTarget .WEIGHTS in ru_targets :
56- wu = self ._weights_utilization (w_qcs )
57- ru [RUTarget .WEIGHTS ] = np .array (list (wu .values ()))
58-
59- if RUTarget .ACTIVATION in ru_targets :
60- au = self ._activation_utilization (act_qcs )
61- ru [RUTarget .ACTIVATION ] = np .array (list (au .values ()))
62-
63- if RUTarget .BOPS in ru_targets :
64- ru [RUTarget .BOPS ] = self ._bops_utilization (act_qcs = act_qcs , w_qcs = w_qcs )
65-
66- if RUTarget .TOTAL in ru_targets :
67- raise ValueError ('Total target should be computed based on weights and activations targets.' )
68-
69- assert len (ru ) == len (ru_targets ), (f'Mismatch between the number of computed and requested metrics.'
70- f'Requested { ru_targets } ' )
71- return ru
52+ act_qcs , w_qcs = self .get_quantization_candidates (mp_cfg )
53+
54+ ru , detailed_ru = self .ru_calculator .compute_resource_utilization (TargetInclusionCriterion .AnyQuantized ,
55+ BitwidthMode .QCustom ,
56+ act_qcs = act_qcs ,
57+ w_qcs = w_qcs ,
58+ ru_targets = ru_targets ,
59+ allow_unused_qcs = True ,
60+ return_detailed = True )
61+
62+ ru_dict = {k : np .array ([v ]) for k , v in ru .get_resource_utilization_dict (restricted_only = True ).items ()}
63+ # For activation and total we need utilization per cut, as different mp configurations might result in
64+ # different cuts to be maximal.
65+ for target in [RUTarget .ACTIVATION , RUTarget .TOTAL ]:
66+ if target in ru_dict :
67+ ru_dict [target ] = np .array (list (detailed_ru [target ].values ()))
68+
69+ assert all (v .ndim == 1 for v in ru_dict .values ())
70+ if RUTarget .ACTIVATION in ru_targets and RUTarget .TOTAL in ru_targets :
71+ assert ru_dict [RUTarget .ACTIVATION ].shape == ru_dict [RUTarget .TOTAL ].shape
72+
73+ assert len (ru_dict ) == len (ru_targets ), (f'Mismatch between the number of computed and requested metrics.'
74+ f'Requested { ru_targets } ' )
75+ return ru_dict
7276
7377 def get_quantization_candidates (self , mp_cfg ) \
74- -> Tuple [Dict [BaseNode , NodeActivationQuantizationConfig ], Dict [BaseNode , NodeWeightsQuantizationConfig ]]:
78+ -> Tuple [Dict [str , NodeActivationQuantizationConfig ], Dict [str , NodeWeightsQuantizationConfig ]]:
7579 """
7680 Retrieve quantization candidates objects for weights and activations from the configuration list.
7781
@@ -87,71 +91,3 @@ def get_quantization_candidates(self, mp_cfg) \
8791 act_qcs = {n .name : cfg .activation_quantization_cfg for n , cfg in node_qcs .items ()}
8892 w_qcs = {n .name : cfg .weights_quantization_cfg for n , cfg in node_qcs .items ()}
8993 return act_qcs , w_qcs
90-
91- def _weights_utilization (self , w_qcs : Optional [Dict [BaseNode , NodeWeightsQuantizationConfig ]]) -> Dict [BaseNode , float ]:
92- """
93- Compute weights utilization for configurable weights if configuration is passed,
94- or for non-configurable nodes otherwise.
95-
96- Args:
97- w_qcs: nodes quantization configuration to compute, or None.
98-
99- Returns:
100- Weight utilization per node.
101- """
102- if w_qcs :
103- target_criterion = TargetInclusionCriterion .QConfigurable
104- bitwidth_mode = BitwidthMode .QCustom
105- else :
106- target_criterion = TargetInclusionCriterion .QNonConfigurable
107- bitwidth_mode = BitwidthMode .QDefaultSP
108-
109- _ , nodes_util , _ = self .ru_calculator .compute_weights_utilization (target_criterion = target_criterion ,
110- bitwidth_mode = bitwidth_mode ,
111- w_qcs = w_qcs )
112- nodes_util = {n : u .bytes for n , u in nodes_util .items ()}
113- return nodes_util
114-
115- def _activation_utilization (self , act_qcs : Optional [Dict [BaseNode , NodeActivationQuantizationConfig ]]) \
116- -> Optional [Dict [Any , float ]]:
117- """
118- Compute activation utilization using MaxCut for all quantized nodes if configuration is passed.
119-
120- Args:
121- act_qcs: nodes activation configuration or None.
122-
123- Returns:
124- Activation utilization per cut, or empty dict if no configuration was passed.
125- """
126- # Maxcut activation utilization is computed for all quantized nodes, so non-configurable memory is already
127- # covered by the computation of configurable activations.
128- if not act_qcs :
129- return {}
130-
131- _ , cuts_util , * _ = self .ru_calculator .compute_activation_utilization_by_cut (
132- TargetInclusionCriterion .AnyQuantized , bitwidth_mode = BitwidthMode .QCustom , act_qcs = act_qcs )
133- cuts_util = {c : u .bytes for c , u in cuts_util .items ()}
134- return cuts_util
135-
136- def _bops_utilization (self ,
137- act_qcs : Optional [Dict [BaseNode , NodeActivationQuantizationConfig ]],
138- w_qcs : Optional [Dict [BaseNode , NodeWeightsQuantizationConfig ]]) -> np .ndarray :
139- """
140- Computes a resource utilization vector with the respective bit-operations (BOPS) count
141- according to the given mixed-precision configuration.
142-
143- Args:
144- act_qcs: nodes activation configuration or None.
145- w_qcs: nodes quantization configuration to compute, or None.
146- Either both are provided, or both are None.
147-
148- Returns:
149- A vector of node's BOPS count.
150- """
151- assert [act_qcs , w_qcs ].count (None ) in [0 , 2 ], 'act_qcs and w_qcs should both be provided or both be None.'
152- if act_qcs is None :
153- return np .array ([])
154-
155- _ , detailed_bops = self .ru_calculator .compute_bops (TargetInclusionCriterion .Any , BitwidthMode .QCustom ,
156- act_qcs = act_qcs , w_qcs = w_qcs )
157- return np .array (list (detailed_bops .values ()))
0 commit comments