19
19
from torch .ao .quantization .quantizer .quantizer import SharedQuantizationSpec
20
20
21
21
import nncf
22
- import nncf .common .quantization as q
22
+ import nncf .common .quantization as quantization
23
23
import nncf .experimental .torch .fx as nncf_fx
24
24
from nncf .common .graph .graph import NNCFGraph
25
25
@@ -50,7 +50,6 @@ def __init__(
50
50
self ,
51
51
* ,
52
52
mode : Optional [QuantizationMode ] = QuantizationMode .INT8_SYM ,
53
- ignored_scope : Optional [nncf .IgnoredScope ] = None ,
54
53
** kwargs ,
55
54
):
56
55
"""
@@ -59,26 +58,53 @@ def __init__(
59
58
- INT8_MIXED: INT8 asymmetric quantization for activations, symmetric for weights.
60
59
- INT8_TRANSFORMER: Optimized INT8 quantization for transformer-based models
61
60
Default value is INT8_SYM.
62
- :param ignored_scope: An ignored scope that defined the list of model control
63
- flow graph nodes to be ignored during quantization.
64
61
:param kwargs: Arguments to pass to the NNCF MinMaxQuantization algorithm.
65
62
"""
66
63
if mode == QuantizationMode .INT8_SYM :
67
- preset = q .structs .QuantizationPreset .PERFORMANCE
64
+ preset = quantization .structs .QuantizationPreset .PERFORMANCE
68
65
model_type = None
69
66
elif mode == QuantizationMode .INT8_MIXED :
70
- preset = q .structs .QuantizationPreset .MIXED
67
+ preset = quantization .structs .QuantizationPreset .MIXED
71
68
model_type = None
72
69
else :
73
70
preset = None
74
71
model_type = nncf .parameters .ModelType .TRANSFORMER
75
72
self ._min_max_algo = nncf .quantization .algorithms .min_max .algorithm .MinMaxQuantization (
76
- preset = preset , model_type = model_type , ignored_scope = ignored_scope , ** kwargs
73
+ preset = preset , model_type = model_type , ** kwargs
74
+ )
75
+
76
+ def set_ignored_scope (
77
+ self ,
78
+ names : Optional [List [str ]] = None ,
79
+ patterns : Optional [List [str ]] = None ,
80
+ types : Optional [List [str ]] = None ,
81
+ subgraphs : Optional [List [Tuple [List [str ], List [str ]]]] = None ,
82
+ validate : bool = True ,
83
+ ) -> None :
84
+ """
85
+ Provides an option to specify portions of model to be excluded from compression.
86
+ The ignored scope defines model sub-graphs that should be excluded from the quantization process.
87
+
88
+ :param names: List of ignored node names.
89
+ :param patterns: List of regular expressions that define patterns for names of ignored nodes.
90
+ :param types: List of ignored operation types.
91
+ :param subgraphs: List of ignored subgraphs.
92
+ :param validate: If set to True, then a RuntimeError will be raised if any ignored scope does not match
93
+ in the model graph.
94
+ """
95
+ self ._min_max_algo .set_ignored_scope (
96
+ nncf .IgnoredScope (
97
+ names = names or [],
98
+ patterns = patterns or [],
99
+ types = types or [],
100
+ subgraphs = subgraphs or [],
101
+ validate = validate ,
102
+ )
77
103
)
78
104
79
105
def get_nncf_quantization_setup (
80
106
self , model : torch .fx .GraphModule , nncf_graph : NNCFGraph
81
- ) -> q .quantizer_setup .SingleConfigQuantizerSetup :
107
+ ) -> quantization .quantizer_setup .SingleConfigQuantizerSetup :
82
108
self ._min_max_algo ._set_backend_entity (model )
83
109
return self ._min_max_algo .find_quantization_setup (model , nncf_graph )
84
110
@@ -134,7 +160,9 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
134
160
135
161
@staticmethod
136
162
def _get_unified_scales_root_quantizer_id (
137
- nncf_graph : NNCFGraph , quantizer_ids : List [int ], quantizer_setup : q .quantizer_setup .SingleConfigQuantizerSetup
163
+ nncf_graph : NNCFGraph ,
164
+ quantizer_ids : List [int ],
165
+ quantizer_setup : quantization .quantizer_setup .SingleConfigQuantizerSetup ,
138
166
) -> int :
139
167
"""
140
168
Identifies the earliest quantizer node ID based on the corresponding `nncf_node.node_id`
@@ -160,7 +188,7 @@ def _get_unified_scales_root_quantizer_id(
160
188
def _get_edge_or_node_and_annotation (
161
189
graph : torch .fx .Graph ,
162
190
nncf_graph : NNCFGraph ,
163
- qp : q .quantizer_setup .QuantizationPointBase ,
191
+ qp : quantization .quantizer_setup .QuantizationPointBase ,
164
192
node_vs_torch_annotation : Dict [torch .fx .Node , QuantizationAnnotation ],
165
193
) -> Tuple [EdgeOrNode , QuantizationAnnotation ]:
166
194
"""
@@ -181,7 +209,7 @@ def _get_edge_or_node_and_annotation(
181
209
182
210
@staticmethod
183
211
def _get_edge_or_node (
184
- target_node : torch .fx .Node , qp : q .quantizer_setup .QuantizationPointBase , nncf_graph : NNCFGraph
212
+ target_node : torch .fx .Node , qp : quantization .quantizer_setup .QuantizationPointBase , nncf_graph : NNCFGraph
185
213
) -> EdgeOrNode :
186
214
"""
187
215
Returns the edge or node based on the given target node and quantization point.
@@ -231,7 +259,7 @@ def _fill_torch_ao_annotation(
231
259
annotation_to_update .input_qspec_map [edge_or_node [0 ]] = qspec
232
260
233
261
@staticmethod
234
- def _get_torch_ao_qspec_from_qp (qp : q .quantizer_setup .QuantizationPointBase ) -> QuantizationSpec :
262
+ def _get_torch_ao_qspec_from_qp (qp : quantization .quantizer_setup .QuantizationPointBase ) -> QuantizationSpec :
235
263
"""
236
264
Retrieves the quantization configuration from the given quantization point and
237
265
converts it into a QuantizationSpec.
@@ -247,13 +275,13 @@ def _get_torch_ao_qspec_from_qp(qp: q.quantizer_setup.QuantizationPointBase) ->
247
275
if qconfig .per_channel :
248
276
torch_qscheme = (
249
277
torch .per_channel_symmetric
250
- if qconfig .mode is q .structs .QuantizationScheme .SYMMETRIC
278
+ if qconfig .mode is quantization .structs .QuantizationScheme .SYMMETRIC
251
279
else torch .per_channel_affine
252
280
)
253
281
else :
254
282
torch_qscheme = (
255
283
torch .per_tensor_symmetric
256
- if qconfig .mode is q .structs .QuantizationScheme .SYMMETRIC
284
+ if qconfig .mode is quantization .structs .QuantizationScheme .SYMMETRIC
257
285
else torch .per_tensor_affine
258
286
)
259
287
if is_weight :
0 commit comments