8
8
import os
9
9
import shutil
10
10
import subprocess
11
+ from itertools import islice
11
12
from pathlib import Path
12
13
13
14
import executorch
24
25
from sklearn .metrics import accuracy_score
25
26
from timm .data import resolve_data_config
26
27
from timm .data .transforms_factory import create_transform
28
+ from torch .ao .quantization .quantize_pt2e import convert_pt2e
29
+ from torch .ao .quantization .quantize_pt2e import prepare_pt2e
27
30
from torch .export import export
28
31
from torch .export .exported_program import ExportedProgram
29
32
from torch .fx .passes .graph_drawer import FxGraphDrawer
@@ -54,8 +57,11 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod
54
57
55
58
if suite == "torchvision" :
56
59
transform = torchvision_models .get_model_weights (model_name ).DEFAULT .transforms ()
57
- else :
60
+ elif suite == "timm" :
58
61
transform = create_transform (** resolve_data_config (model .pretrained_cfg , model = model ))
62
+ else :
63
+ msg = f"Validation is not supported yet for the suite { suite } "
64
+ raise ValueError (msg )
59
65
60
66
val_dataset = datasets .ImageFolder (val_dir , transform = transform )
61
67
@@ -85,6 +91,76 @@ def dump_inputs(calibration_dataset, dest_path):
85
91
return input_files , targets
86
92
87
93
94
+ def quantize_model (
95
+ captured_model : torch .fx .GraphModule , calibration_dataset : torch .utils .data .DataLoader , use_nncf : bool
96
+ ) -> torch .fx .GraphModule :
97
+ quantizer = OpenVINOQuantizer ()
98
+
99
+ print ("PTQ: Quantize the model" )
100
+ default_subset_size = 300
101
+ batch_size = calibration_dataset .batch_size
102
+ subset_size = (default_subset_size // batch_size ) + int (default_subset_size % batch_size > 0 )
103
+
104
+ def transform (x ):
105
+ return x [0 ]
106
+
107
+ if use_nncf :
108
+
109
+ quantized_model = quantize_pt2e (
110
+ captured_model ,
111
+ quantizer ,
112
+ subset_size = subset_size ,
113
+ calibration_dataset = nncf .Dataset (calibration_dataset , transform_func = transform ),
114
+ fold_quantize = False ,
115
+ )
116
+ else :
117
+ annotated_model = prepare_pt2e (captured_model , quantizer )
118
+
119
+ print ("PTQ: Calibrate the model..." )
120
+ for data in islice (calibration_dataset , subset_size ):
121
+ annotated_model (transform (data ))
122
+
123
+ print ("PTQ: Convert the quantized model..." )
124
+ quantized_model = convert_pt2e (annotated_model , fold_quantize = False )
125
+
126
+ return quantized_model
127
+
128
+
129
+ def validate_model (model_file_name : str , calibration_dataset : torch .utils .data .DataLoader ) -> float :
130
+ # 1: Dump inputs
131
+ dest_path = Path ("tmp_inputs" )
132
+ out_path = Path ("tmp_outputs" )
133
+ for d in [dest_path , out_path ]:
134
+ if os .path .exists (d ):
135
+ shutil .rmtree (d )
136
+ os .makedirs (d )
137
+
138
+ input_files , targets = dump_inputs (calibration_dataset , dest_path )
139
+ inp_list_file = dest_path / "in_list.txt"
140
+ with open (inp_list_file , "w" ) as f :
141
+ f .write ("\n " .join (input_files ) + "\n " )
142
+
143
+ # 2: Run the executor
144
+ print ("Run openvino_executor_runner..." )
145
+
146
+ subprocess .run (
147
+ [
148
+ "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner" ,
149
+ f"--model_path={ model_file_name } " ,
150
+ f"--input_list_path={ inp_list_file } " ,
151
+ f"--output_folder_path={ out_path } " ,
152
+ ]
153
+ )
154
+
155
+ # 3: load the outputs and compare with the targets
156
+ predictions = []
157
+ for i in range (len (input_files )):
158
+ tensor = np .fromfile (out_path / f"output_{ i } _0.raw" , dtype = np .float32 )
159
+ predictions .extend (torch .tensor (tensor ).reshape (- 1 , 1000 ).argmax (- 1 ))
160
+
161
+ return accuracy_score (predictions , targets )
162
+
163
+
88
164
def main (
89
165
suite : str ,
90
166
model_name : str ,
@@ -94,6 +170,7 @@ def main(
94
170
dataset_path : str ,
95
171
device : str ,
96
172
batch_size : int ,
173
+ quantization_flow : str ,
97
174
):
98
175
# Load the selected model
99
176
model = load_model (suite , model_name )
@@ -104,7 +181,7 @@ def main(
104
181
input_shape = tuple (next (iter (calibration_dataset ))[0 ].shape )
105
182
print (f"Input shape retrieved from the model config: { input_shape } " )
106
183
# Ensure input_shape is a tuple
107
- elif isinstance (input_shape , list ):
184
+ elif isinstance (input_shape , ( list , tuple ) ):
108
185
input_shape = tuple (input_shape )
109
186
else :
110
187
msg = "Input shape must be a list or tuple."
@@ -124,23 +201,8 @@ def main(
124
201
if not dataset_path :
125
202
msg = "Quantization requires a calibration dataset."
126
203
raise ValueError (msg )
127
-
128
- captured_model = aten_dialect .module ()
129
- quantizer = OpenVINOQuantizer ()
130
-
131
- print ("PTQ: Quantize the model" )
132
-
133
- def transform (x ):
134
- return x [0 ]
135
-
136
- default_subset_size = 300
137
- batch_size = calibration_dataset .batch_size
138
- quantized_model = quantize_pt2e (
139
- captured_model ,
140
- quantizer ,
141
- subset_size = (default_subset_size // batch_size ) + int (default_subset_size % batch_size > 0 ),
142
- calibration_dataset = nncf .Dataset (calibration_dataset , transform_func = transform ),
143
- fold_quantize = False ,
204
+ quantized_model = quantize_model (
205
+ aten_dialect .module (), calibration_dataset , use_nncf = quantization_flow == "nncf"
144
206
)
145
207
visualize_fx_model (quantized_model , f"{ model_name } _int8.svg" )
146
208
@@ -172,39 +234,8 @@ def transform(x):
172
234
msg = "Validateion requires a calibration dataset."
173
235
raise ValueError (msg )
174
236
175
- print ("Start validation of the quantized model:" )
176
- # 1: Dump inputs
177
- dest_path = Path ("tmp_inputs" )
178
- out_path = Path ("tmp_outputs" )
179
- for d in [dest_path , out_path ]:
180
- if os .path .exists (d ):
181
- shutil .rmtree (d )
182
- os .makedirs (d )
183
-
184
- input_files , targets = dump_inputs (calibration_dataset , dest_path )
185
- inp_list_file = dest_path / "in_list.txt"
186
- with open (inp_list_file , "w" ) as f :
187
- f .write ("\n " .join (input_files ) + "\n " )
188
-
189
- # 2: Run the executor
190
- print ("Run openvino_executor_runner..." )
191
-
192
- subprocess .run (
193
- [
194
- "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner" ,
195
- f"--model_path={ model_file_name } " ,
196
- f"--input_list_path={ inp_list_file } " ,
197
- f"--output_folder_path={ out_path } " ,
198
- ]
199
- )
200
-
201
- # 3: load the outputs and compare with the targets
202
- predictions = []
203
- for i in range (len (input_files )):
204
- tensor = np .fromfile (out_path / f"output_{ i } _0.raw" , dtype = np .float32 )
205
- predictions .extend (torch .tensor (tensor ).reshape (- 1 , 1000 ).argmax (- 1 ))
206
-
207
- acc_top1 = accuracy_score (predictions , targets )
237
+ print ("Start validation of the model:" )
238
+ acc_top1 = validate_model (model_file_name , calibration_dataset )
208
239
print (f"acc@1: { acc_top1 } " )
209
240
210
241
@@ -244,10 +275,20 @@ def transform(x):
244
275
default = "CPU" ,
245
276
help = "Target device for compiling the model (e.g., CPU, GPU). Default is CPU." ,
246
277
)
278
+ parser .add_argument (
279
+ "--quantization_flow" ,
280
+ type = str ,
281
+ choices = ["pt2e" , "nncf" ],
282
+ default = "nncf" ,
283
+ help = "Select the quantization flow (nncf or pt2e):"
284
+ " pt2e is the default torch.ao quantization flow, while"
285
+ " nncf is a custom method with additional algorithms to improve model performance." ,
286
+ )
247
287
248
288
args = parser .parse_args ()
249
289
250
290
# Run the main function with parsed arguments
291
+ # Disable nncf patching as export of the patched model is not supported.
251
292
with nncf .torch .disable_patching ():
252
293
main (
253
294
args .suite ,
@@ -258,4 +299,5 @@ def transform(x):
258
299
args .dataset ,
259
300
args .device ,
260
301
args .batch_size ,
302
+ args .quantization_flow ,
261
303
)
0 commit comments