88import os
99import shutil
1010import subprocess
11+ from itertools import islice
1112from pathlib import Path
1213
1314import executorch
2425from sklearn .metrics import accuracy_score
2526from timm .data import resolve_data_config
2627from timm .data .transforms_factory import create_transform
28+ from torch .ao .quantization .quantize_pt2e import convert_pt2e
29+ from torch .ao .quantization .quantize_pt2e import prepare_pt2e
2730from torch .export import export
2831from torch .export .exported_program import ExportedProgram
2932from torch .fx .passes .graph_drawer import FxGraphDrawer
@@ -54,8 +57,11 @@ def load_calibration_dataset(dataset_path: str, batch_size: int, suite: str, mod
5457
5558 if suite == "torchvision" :
5659 transform = torchvision_models .get_model_weights (model_name ).DEFAULT .transforms ()
57- else :
60+ elif suite == "timm" :
5861 transform = create_transform (** resolve_data_config (model .pretrained_cfg , model = model ))
62+ else :
63+ msg = f"Validation is not supported yet for the suite { suite } "
64+ raise ValueError (msg )
5965
6066 val_dataset = datasets .ImageFolder (val_dir , transform = transform )
6167
@@ -85,6 +91,76 @@ def dump_inputs(calibration_dataset, dest_path):
8591 return input_files , targets
8692
8793
94+ def quantize_model (
95+ captured_model : torch .fx .GraphModule , calibration_dataset : torch .utils .data .DataLoader , use_nncf : bool
96+ ) -> torch .fx .GraphModule :
97+ quantizer = OpenVINOQuantizer ()
98+
99+ print ("PTQ: Quantize the model" )
100+ default_subset_size = 300
101+ batch_size = calibration_dataset .batch_size
102+ subset_size = (default_subset_size // batch_size ) + int (default_subset_size % batch_size > 0 )
103+
104+ def transform (x ):
105+ return x [0 ]
106+
107+ if use_nncf :
108+
109+ quantized_model = quantize_pt2e (
110+ captured_model ,
111+ quantizer ,
112+ subset_size = subset_size ,
113+ calibration_dataset = nncf .Dataset (calibration_dataset , transform_func = transform ),
114+ fold_quantize = False ,
115+ )
116+ else :
117+ annotated_model = prepare_pt2e (captured_model , quantizer )
118+
119+ print ("PTQ: Calibrate the model..." )
120+ for data in islice (calibration_dataset , subset_size ):
121+ annotated_model (transform (data ))
122+
123+ print ("PTQ: Convert the quantized model..." )
124+ quantized_model = convert_pt2e (annotated_model , fold_quantize = False )
125+
126+ return quantized_model
127+
128+
129+ def validate_model (model_file_name : str , calibration_dataset : torch .utils .data .DataLoader ) -> float :
130+ # 1: Dump inputs
131+ dest_path = Path ("tmp_inputs" )
132+ out_path = Path ("tmp_outputs" )
133+ for d in [dest_path , out_path ]:
134+ if os .path .exists (d ):
135+ shutil .rmtree (d )
136+ os .makedirs (d )
137+
138+ input_files , targets = dump_inputs (calibration_dataset , dest_path )
139+ inp_list_file = dest_path / "in_list.txt"
140+ with open (inp_list_file , "w" ) as f :
141+ f .write ("\n " .join (input_files ) + "\n " )
142+
143+ # 2: Run the executor
144+ print ("Run openvino_executor_runner..." )
145+
146+ subprocess .run (
147+ [
148+ "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner" ,
149+ f"--model_path={ model_file_name } " ,
150+ f"--input_list_path={ inp_list_file } " ,
151+ f"--output_folder_path={ out_path } " ,
152+ ]
153+ )
154+
155+ # 3: load the outputs and compare with the targets
156+ predictions = []
157+ for i in range (len (input_files )):
158+ tensor = np .fromfile (out_path / f"output_{ i } _0.raw" , dtype = np .float32 )
159+ predictions .extend (torch .tensor (tensor ).reshape (- 1 , 1000 ).argmax (- 1 ))
160+
161+ return accuracy_score (predictions , targets )
162+
163+
88164def main (
89165 suite : str ,
90166 model_name : str ,
@@ -94,6 +170,7 @@ def main(
94170 dataset_path : str ,
95171 device : str ,
96172 batch_size : int ,
173+ quantization_flow : str ,
97174):
98175 # Load the selected model
99176 model = load_model (suite , model_name )
@@ -104,7 +181,7 @@ def main(
104181 input_shape = tuple (next (iter (calibration_dataset ))[0 ].shape )
105182 print (f"Input shape retrieved from the model config: { input_shape } " )
106183 # Ensure input_shape is a tuple
107- elif isinstance (input_shape , list ):
184+ elif isinstance (input_shape , ( list , tuple ) ):
108185 input_shape = tuple (input_shape )
109186 else :
110187 msg = "Input shape must be a list or tuple."
@@ -124,23 +201,8 @@ def main(
124201 if not dataset_path :
125202 msg = "Quantization requires a calibration dataset."
126203 raise ValueError (msg )
127-
128- captured_model = aten_dialect .module ()
129- quantizer = OpenVINOQuantizer ()
130-
131- print ("PTQ: Quantize the model" )
132-
133- def transform (x ):
134- return x [0 ]
135-
136- default_subset_size = 300
137- batch_size = calibration_dataset .batch_size
138- quantized_model = quantize_pt2e (
139- captured_model ,
140- quantizer ,
141- subset_size = (default_subset_size // batch_size ) + int (default_subset_size % batch_size > 0 ),
142- calibration_dataset = nncf .Dataset (calibration_dataset , transform_func = transform ),
143- fold_quantize = False ,
204+ quantized_model = quantize_model (
205+ aten_dialect .module (), calibration_dataset , use_nncf = quantization_flow == "nncf"
144206 )
145207 visualize_fx_model (quantized_model , f"{ model_name } _int8.svg" )
146208
@@ -172,39 +234,8 @@ def transform(x):
172234 msg = "Validateion requires a calibration dataset."
173235 raise ValueError (msg )
174236
175- print ("Start validation of the quantized model:" )
176- # 1: Dump inputs
177- dest_path = Path ("tmp_inputs" )
178- out_path = Path ("tmp_outputs" )
179- for d in [dest_path , out_path ]:
180- if os .path .exists (d ):
181- shutil .rmtree (d )
182- os .makedirs (d )
183-
184- input_files , targets = dump_inputs (calibration_dataset , dest_path )
185- inp_list_file = dest_path / "in_list.txt"
186- with open (inp_list_file , "w" ) as f :
187- f .write ("\n " .join (input_files ) + "\n " )
188-
189- # 2: Run the executor
190- print ("Run openvino_executor_runner..." )
191-
192- subprocess .run (
193- [
194- "../../../cmake-openvino-out/examples/openvino/openvino_executor_runner" ,
195- f"--model_path={ model_file_name } " ,
196- f"--input_list_path={ inp_list_file } " ,
197- f"--output_folder_path={ out_path } " ,
198- ]
199- )
200-
201- # 3: load the outputs and compare with the targets
202- predictions = []
203- for i in range (len (input_files )):
204- tensor = np .fromfile (out_path / f"output_{ i } _0.raw" , dtype = np .float32 )
205- predictions .extend (torch .tensor (tensor ).reshape (- 1 , 1000 ).argmax (- 1 ))
206-
207- acc_top1 = accuracy_score (predictions , targets )
237+ print ("Start validation of the model:" )
238+ acc_top1 = validate_model (model_file_name , calibration_dataset )
208239 print (f"acc@1: { acc_top1 } " )
209240
210241
@@ -244,10 +275,20 @@ def transform(x):
244275 default = "CPU" ,
245276 help = "Target device for compiling the model (e.g., CPU, GPU). Default is CPU." ,
246277 )
278+ parser .add_argument (
279+ "--quantization_flow" ,
280+ type = str ,
281+ choices = ["pt2e" , "nncf" ],
282+ default = "nncf" ,
283+ help = "Select the quantization flow (nncf or pt2e):"
284+ " pt2e is the default torch.ao quantization flow, while"
285+ " nncf is a custom method with additional algorithms to improve model performance." ,
286+ )
247287
248288 args = parser .parse_args ()
249289
250290 # Run the main function with parsed arguments
291+ # Disable nncf patching as export of the patched model is not supported.
251292 with nncf .torch .disable_patching ():
252293 main (
253294 args .suite ,
@@ -258,4 +299,5 @@ def transform(x):
258299 args .dataset ,
259300 args .device ,
260301 args .batch_size ,
302+ args .quantization_flow ,
261303 )
0 commit comments