1919from torch .ao .quantization .quantizer .quantizer import SharedQuantizationSpec
2020
2121import nncf
22- import nncf .common .quantization as q
22+ import nncf .common .quantization as quantization
2323import nncf .experimental .torch .fx as nncf_fx
2424from nncf .common .graph .graph import NNCFGraph
2525
@@ -50,7 +50,6 @@ def __init__(
5050 self ,
5151 * ,
5252 mode : Optional [QuantizationMode ] = QuantizationMode .INT8_SYM ,
53- ignored_scope : Optional [nncf .IgnoredScope ] = None ,
5453 ** kwargs ,
5554 ):
5655 """
@@ -59,26 +58,53 @@ def __init__(
5958 - INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
6059 - INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
6160 Default value is INT8_SYM.
62- :param ignored_scope: An ignored scope that defined the list of model control
63- flow graph nodes to be ignored during quantization.
6461 :param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
6562 """
6663 if mode == QuantizationMode .INT8_SYM :
67- preset = q .structs .QuantizationPreset .PERFORMANCE
64+ preset = quantization .structs .QuantizationPreset .PERFORMANCE
6865 model_type = None
6966 elif mode == QuantizationMode .INT8_MIXED :
70- preset = q .structs .QuantizationPreset .MIXED
67+ preset = quantization .structs .QuantizationPreset .MIXED
7168 model_type = None
7269 else :
7370 preset = None
7471 model_type = nncf .parameters .ModelType .TRANSFORMER
7572 self ._min_max_algo = nncf .quantization .algorithms .min_max .algorithm .MinMaxQuantization (
76- preset = preset , model_type = model_type , ignored_scope = ignored_scope , ** kwargs
73+ preset = preset , model_type = model_type , ** kwargs
74+ )
75+
76+ def set_ignored_scope (
77+ self ,
78+ names : Optional [List [str ]] = None ,
79+ patterns : Optional [List [str ]] = None ,
80+ types : Optional [List [str ]] = None ,
81+ subgraphs : Optional [List [Tuple [List [str ], List [str ]]]] = None ,
82+ validate : bool = True ,
83+ ) -> None :
84+ """
85+ Provides an option to specify portions of model to be excluded from compression.
86+ The ignored scope defines model sub-graphs that should be excluded from the quantization process.
87+
88+ :param names: List of ignored node names.
89+ :param patterns: List of regular expressions that define patterns for names of ignored nodes.
90+ :param types: List of ignored operation types.
91+ :param subgraphs: List of ignored subgraphs.
92+ :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match
93+ in the model graph.
94+ """
95+ self ._min_max_algo .set_ignored_scope (
96+ nncf .IgnoredScope (
97+ names = names or [],
98+ patterns = patterns or [],
99+ types = types or [],
100+ subgraphs = subgraphs or [],
101+ validate = validate ,
102+ )
77103 )
78104
79105 def get_nncf_quantization_setup (
80106 self , model : torch .fx .GraphModule , nncf_graph : NNCFGraph
81- ) -> q .quantizer_setup .SingleConfigQuantizerSetup :
107+ ) -> quantization .quantizer_setup .SingleConfigQuantizerSetup :
82108 self ._min_max_algo ._set_backend_entity (model )
83109 return self ._min_max_algo .find_quantization_setup (model , nncf_graph )
84110
@@ -134,7 +160,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
134160
135161 @staticmethod
136162 def _get_unified_scales_root_quantizer_id (
137- nncf_graph : NNCFGraph , quantizer_ids : List [int ], quantizer_setup : q .quantizer_setup .SingleConfigQuantizerSetup
163+ nncf_graph : NNCFGraph ,
164+ quantizer_ids : List [int ],
165+ quantizer_setup : quantization .quantizer_setup .SingleConfigQuantizerSetup ,
138166 ) -> int :
139167 """
140168 Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
@@ -160,7 +188,7 @@ def _get_unified_scales_root_quantizer_id(
160188 def _get_edge_or_node_and_annotation (
161189 graph : torch .fx .Graph ,
162190 nncf_graph : NNCFGraph ,
163- qp : q .quantizer_setup .QuantizationPointBase ,
191+ qp : quantization .quantizer_setup .QuantizationPointBase ,
164192 node_vs_torch_annotation : Dict [torch .fx .Node , QuantizationAnnotation ],
165193 ) -> Tuple [EdgeOrNode , QuantizationAnnotation ]:
166194 """
@@ -181,7 +209,7 @@ def _get_edge_or_node_and_annotation(
181209
182210 @staticmethod
183211 def _get_edge_or_node (
184- target_node : torch .fx .Node , qp : q .quantizer_setup .QuantizationPointBase , nncf_graph : NNCFGraph
212+ target_node : torch .fx .Node , qp : quantization .quantizer_setup .QuantizationPointBase , nncf_graph : NNCFGraph
185213 ) -> EdgeOrNode :
186214 """
187215 Returns the edge or node based on the given target node and quantization point.
@@ -231,7 +259,7 @@ def _fill_torch_ao_annotation(
231259 annotation_to_update .input_qspec_map [edge_or_node [0 ]] = qspec
232260
233261 @staticmethod
234- def _get_torch_ao_qspec_from_qp (qp : q .quantizer_setup .QuantizationPointBase ) -> QuantizationSpec :
262+ def _get_torch_ao_qspec_from_qp (qp : quantization .quantizer_setup .QuantizationPointBase ) -> QuantizationSpec :
235263 """
236264 Retrieves the quantization configuration from the given quantization point and
237265 converts it into a QuantizationSpec.
@@ -247,13 +275,13 @@ def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) ->
247275 if qconfig .per_channel :
248276 torch_qscheme = (
249277 torch .per_channel_symmetric
250- if qconfig .mode is q .structs .QuantizationScheme .SYMMETRIC
278+ if qconfig .mode is quantization .structs .QuantizationScheme .SYMMETRIC
251279 else torch .per_channel_affine
252280 )
253281 else :
254282 torch_qscheme = (
255283 torch .per_tensor_symmetric
256- if qconfig .mode is q .structs .QuantizationScheme .SYMMETRIC
284+ if qconfig .mode is quantization .structs .QuantizationScheme .SYMMETRIC
257285 else torch .per_tensor_affine
258286 )
259287 if is_weight :
0 commit comments