1818import time
1919from copy import deepcopy
2020from pathlib import Path
21- from typing import Any , Dict , Tuple
21+ from typing import Dict , Tuple
2222
2323import numpy as np
2424import openvino as ov
2929from torch .fx .passes .graph_drawer import FxGraphDrawer
3030from tqdm import tqdm
3131from ultralytics .cfg import get_cfg
32- from ultralytics .data .converter import coco80_to_coco91_class
3332from ultralytics .data .utils import check_det_dataset
3433from ultralytics .engine .validator import BaseValidator as Validator
3534from ultralytics .models .yolo import YOLO
36- from ultralytics .utils import DATASETS_DIR
3735from ultralytics .utils import DEFAULT_CFG
38- from ultralytics .utils .metrics import ConfusionMatrix
3936from ultralytics .utils .torch_utils import de_parallel
4037
4138import nncf
@@ -55,15 +52,18 @@ def measure_time(model, example_inputs, num_iters=500):
5552 return average_time
5653
5754
58- def validate_fx_ult_method (model : ov .Model ) -> Tuple [Dict , int , int ]:
59- """
60- Uses .val ultralitics method instead of a dataloader loop.
61- For some reason this shows better metrics on torch.compiled models
62- """
63- yolo = YOLO (f"{ ROOT } /{ MODEL_NAME } .pt" )
64- yolo .model = model
65- result = yolo .val (data = "coco128.yaml" , batch = 1 , rect = False )
66- return result .results_dict
55+ def measure_time_ov (model , example_inputs , num_iters = 1000 ):
56+ ie = ov .Core ()
57+ compiled_model = ie .compile_model (model , "CPU" )
58+ infer_request = compiled_model .create_infer_request ()
59+ infer_request .infer (example_inputs )
60+ total_time = 0
61+ for i in range (0 , num_iters ):
62+ start_time = time .time ()
63+ infer_request .infer (example_inputs )
64+ total_time += time .time () - start_time
65+ average_time = (total_time / num_iters ) * 1000
66+ return average_time
6767
6868
6969def validate_fx (
@@ -100,10 +100,10 @@ def print_statistics_short(stats: np.ndarray) -> None:
100100def validate_ov (
101101 model : ov .Model , data_loader : torch .utils .data .DataLoader , validator : Validator , num_samples : int = None
102102) -> Tuple [Dict , int , int ]:
103- validator .seen = 0
104- validator .jdict = []
105- validator .stats = []
106- validator .confusion_matrix = ConfusionMatrix (nc = validator .nc )
103+ # validator.seen = 0
104+ # validator.jdict = []
105+ # validator.stats = []
106+ # validator.confusion_matrix = ConfusionMatrix(nc=validator.nc)
107107 model .reshape ({0 : [1 , 3 , - 1 , - 1 ]})
108108 compiled_model = ov .compile_model (model )
109109 output_layer = compiled_model .output (0 )
@@ -131,7 +131,7 @@ def print_statistics(stats: np.ndarray, total_images: int, total_objects: int) -
131131 print (pf % ("all" , total_images , total_objects , mp , mr , map50 , mean_ap ))
132132
133133
134- def prepare_validation_new (model : YOLO , data : str ) -> Tuple [Validator , torch .utils .data .DataLoader ]:
134+ def prepare_validation (model : YOLO , data : str ) -> Tuple [Validator , torch .utils .data .DataLoader ]:
135135 # custom = {"rect": True, "batch": 1} # method defaults
136136 # rect: false forces to resize all input pictures to one size
137137 custom = {"rect" : False , "batch" : 1 } # method defaults
@@ -148,25 +148,6 @@ def prepare_validation_new(model: YOLO, data: str) -> Tuple[Validator, torch.uti
148148 return validator , data_loader
149149
150150
151- def prepare_validation (model : YOLO , args : Any ) -> Tuple [Validator , torch .utils .data .DataLoader ]:
152- validator = model .smart_load ("validator" )(args )
153- validator .data = check_det_dataset (args .data )
154- dataset = validator .data ["val" ]
155- print (f"{ dataset } " )
156-
157- data_loader = validator .get_dataloader (f"{ DATASETS_DIR } /coco128" , 1 )
158-
159- validator = model .smart_load ("validator" )(args )
160-
161- validator .is_coco = True
162- validator .class_map = coco80_to_coco91_class ()
163- validator .names = model .model .names
164- validator .metrics .names = validator .names
165- validator .nc = model .model .model [- 1 ].nc
166-
167- return validator , data_loader
168-
169-
170151def benchmark_performance (model_path , config ) -> float :
171152 command = f"benchmark_app -m { model_path } -d CPU -api async -t 30"
172153 command += f' -shape "[1,3,{ config .imgsz } ,{ config .imgsz } ]"'
@@ -221,7 +202,7 @@ def transform_fn(data_item: Dict):
221202 return quantized_model
222203
223204
224- NNCF_QUANTIZATION = True
205+ NNCF_QUANTIZATION = False
225206
226207
227208def quantize_impl (exported_model , val_loader , validator ):
@@ -290,26 +271,25 @@ def main():
290271 # args.data = "coco128.yaml"
291272 # Prepare validation dataset and helper
292273
293- validator , data_loader = prepare_validation_new (model , "coco128.yaml" )
274+ validator , data_loader = prepare_validation (model , "coco128.yaml" )
294275
295276 # Convert to OpenVINO model
296- if TORCH_FX :
297- batch = next (iter (data_loader ))
298- batch = validator .preprocess (batch )
277+ batch = next (iter (data_loader ))
278+ batch = validator .preprocess (batch )
299279
280+ if TORCH_FX :
300281 fp_stats , total_images , total_objects = validate_fx (model .model , tqdm (data_loader ), validator )
301282 print ("Floating-point Torch model validation results:" )
302283 print_statistics (fp_stats , total_images , total_objects )
303284
304- fp32_compiled_model = torch .compile (model .model , backend = "openvino" )
285+ if NNCF_QUANTIZATION :
286+ fp32_compiled_model = torch .compile (model .model , backend = "openvino" )
287+ else :
288+ fp32_compiled_model = torch .compile (model .model )
305289 fp32_stats , total_images , total_objects = validate_fx (fp32_compiled_model , tqdm (data_loader ), validator )
306290 print ("FP32 FX model validation results:" )
307291 print_statistics (fp32_stats , total_images , total_objects )
308292
309- # result = validate_fx_ult_method(fp32_compiled_model)
310- # print("FX FP32 model .val validation")
311- # print_statistics_short(result)
312-
313293 print ("Start quantization..." )
314294 # Rebuild model to reset ultralitics cache
315295 model = YOLO (f"{ ROOT } /{ MODEL_NAME } .pt" )
@@ -323,10 +303,6 @@ def main():
323303 )
324304 quantized_model = quantize_impl (deepcopy (exported_model ), data_loader , validator )
325305
326- # result = validate_fx_ult_method(quantized_model)
327- # print("FX INT8 model .val validation")
328- # print_statistics_short(result)
329-
330306 int8_stats , total_images , total_objects = validate_fx (quantized_model , tqdm (data_loader ), validator )
331307 print ("INT8 FX model validation results:" )
332308 print_statistics (int8_stats , total_images , total_objects )
@@ -360,35 +336,52 @@ def main():
360336 print ("Quantized model validation results:" )
361337 print_statistics (q_stats , total_images , total_objects )
362338
363- # Benchmark performance of FP32 model
364- fp_model_perf = benchmark_performance (ov_model_path , args )
365- print (f"Floating-point model performance: { fp_model_perf } FPS" )
366-
367- # Benchmark performance of quantized model
368- quantized_model_perf = benchmark_performance (quantized_model_path , args )
369- print (f"Quantized model performance: { quantized_model_perf } FPS" )
339+ fps = True
340+ latency = True
341+ fp_model_perf = - 1
342+ quantized_model_perf = - 1
343+ if fps :
344+ # Benchmark performance of FP32 model
345+ fp_model_perf = benchmark_performance (ov_model_path , args )
346+ print (f"Floating-point model performance: { fp_model_perf } FPS" )
347+
348+ # Benchmark performance of quantized model
349+ quantized_model_perf = benchmark_performance (quantized_model_path , args )
350+ print (f"Quantized model performance: { quantized_model_perf } FPS" )
351+ if latency :
352+ fp_model_latency = measure_time_ov (ov_model , batch ["img" ])
353+ print (f"FP32 OV model latency: { fp_model_latency } " )
354+ int8_model_latency = measure_time_ov (quantized_model , batch ["img" ])
355+ print (f"INT8 OV model latency: { int8_model_latency } " )
370356
371357 return fp_stats ["metrics/mAP50-95(B)" ], q_stats ["metrics/mAP50-95(B)" ], fp_model_perf , quantized_model_perf
372358
373359
374- def check_export_not_strict ():
360+ def main_export_not_strict ():
375361 model = YOLO (f"{ ROOT } /{ MODEL_NAME } .pt" )
376362
377363 # Prepare validation dataset and helper
378- validator , data_loader = prepare_validation_new (model , "coco128.yaml" )
364+ validator , data_loader = prepare_validation (model , "coco128.yaml" )
379365
380366 batch = next (iter (data_loader ))
381367 batch = validator .preprocess (batch )
382368
383369 model .model (batch ["img" ])
384370 ex_model = torch .export .export (model .model , args = (batch ["img" ],), strict = False )
385371 ex_model = capture_pre_autograd_graph (ex_model .module (), args = (batch ["img" ],))
372+ ex_model = torch .compile (ex_model )
386373
387374 fp_stats , total_images , total_objects = validate_fx (ex_model , tqdm (data_loader ), validator )
388375 print ("Floating-point ex strict=False" )
389376 print_statistics (fp_stats , total_images , total_objects )
390377
378+ quantized_model = quantize_impl (deepcopy (ex_model ), data_loader , validator )
379+ int8_stats , total_images , total_objects = validate_fx (quantized_model , tqdm (data_loader ), validator )
380+ print ("Int8 ex strict=False" )
381+ print_statistics (int8_stats , total_images , total_objects )
382+ # No quantized were inserted, metrics are OK
383+
391384
392385if __name__ == "__main__" :
393- check_export_not_strict ()
394- # main()
386+ # main_export_not_strict ()
387+ main ()
0 commit comments