11import os
22import onnx
3- import glob
43import scipy .io
54import numpy as np
65import logging
76from PIL import Image
87import onnx
98import onnxruntime
10- from onnxruntime . quantization import CalibrationDataReader , create_calibrator , write_calibration_table
9+ import time
1110
11+ from onnxruntime .quantization import CalibrationDataReader , create_calibrator , write_calibration_table
1212
1313class ImageNetDataReader (CalibrationDataReader ):
1414 def __init__ (self ,
@@ -126,10 +126,10 @@ def preprocess_imagenet(self, images_folder, height, width, start_index=0, size_
126126 return: list of matrices characterizing multiple images
127127 '''
128128 def preprocess_images (input , channels = 3 , height = 224 , width = 224 ):
129- image = input .resize ((width , height ), Image .ANTIALIAS )
129+ image = input .resize ((width , height ), Image .Resampling . LANCZOS )
130130 input_data = np .asarray (image ).astype (np .float32 )
131131 if len (input_data .shape ) != 2 :
132- input_data = input_data .transpose ([2 , 0 , 1 ])
132+ input_data = input_data .transpose ([2 , 0 , 1 ])[: 3 ]
133133 else :
134134 input_data = np .stack ([input_data ] * 3 )
135135 mean = np .array ([0.079 , 0.05 , 0 ]) + 0.406
@@ -217,6 +217,7 @@ def __init__(self,
217217 self .data_reader = data_reader
218218 self .providers = providers
219219 self .prediction_result_list = []
220+ self .inference_latency_list = []
220221 self .synset_id = synset_id
221222
222223 def get_result (self ):
@@ -233,7 +234,12 @@ def predict(self):
233234 inputs = self .data_reader .get_next ()
234235 if not inputs :
235236 break
237+
238+ start_ns = time .perf_counter_ns ()
236239 output = session .run (None , inputs )
240+ end_ns = time .perf_counter_ns ()
241+ self .inference_latency_list .append (end_ns - start_ns )
242+
237243 inference_outputs_list .append (output )
238244 self .prediction_result_list = inference_outputs_list
239245
@@ -254,6 +260,9 @@ def evaluate(self, prediction_results):
254260 i = i + batch_size
255261 print ("top 1: " , self .top_k_accuracy (self .synset_id , y_prediction , k = 1 ))
256262 print ("top 5: " , self .top_k_accuracy (self .synset_id , y_prediction , k = 5 ))
263+ if self .inference_latency_list :
264+ print ("average latency:" , sum (self .inference_latency_list ) / len (self .inference_latency_list ) / 1e6 , " ms" )
265+
257266
258267
259268def convert_model_batch_to_dynamic (model_path ):
@@ -303,7 +312,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
303312 4. Extract development kit to 'ILSVRC2012/devkit'. Two files in the development kit are used, 'ILSVRC2012_validation_ground_truth.txt' and 'meta.mat'.
304313 5. Download 'synset_words.txt' from https://github.com/HoldenCaulfieldRye/caffe/blob/master/data/ilsvrc12/synset_words.txt into 'ILSVRC2012/'.
305314
306- Please download Resnet50 model from ONNX model zoo https://github.com/onnx/models/blob/master/ vision/classification/resnet/model/resnet50-v2-7.tar.gz
315+ Please download Resnet50 model from ONNX model zoo https://github.com/onnx/models/raw/refs/heads/main/validated/ vision/classification/resnet/model/resnet50-v2-7.onnx
307316 Untar the model into the workspace
308317 '''
309318
@@ -317,13 +326,6 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
317326 # INT8 calibration setting
318327 calibration_table_generation_enable = True # Enable/Disable INT8 calibration
319328
320- # TensorRT EP INT8 settings
321- os .environ ["ORT_TENSORRT_FP16_ENABLE" ] = "1" # Enable FP16 precision
322- os .environ ["ORT_TENSORRT_INT8_ENABLE" ] = "1" # Enable INT8 precision
323- os .environ ["ORT_TENSORRT_INT8_CALIBRATION_TABLE_NAME" ] = "calibration.flatbuffers" # Calibration table name
324- os .environ ["ORT_TENSORRT_ENGINE_CACHE_ENABLE" ] = "1" # Enable engine caching
325- execution_provider = ["TensorrtExecutionProvider" ]
326-
327329 # Convert static batch to dynamic batch
328330 [new_model_path , input_name ] = convert_model_batch_to_dynamic (model_path )
329331
@@ -343,7 +345,7 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
343345 model_path = augmented_model_path ,
344346 input_name = input_name )
345347 calibrator .collect_data (data_reader )
346- write_calibration_table (calibrator .compute_range ())
348+ write_calibration_table (calibrator .compute_data ())
347349
348350 # Run prediction in Tensorrt EP
349351 data_reader = ImageNetDataReader (ilsvrc2012_dataset_path ,
@@ -355,7 +357,30 @@ def get_dataset_size(dataset_path, calibration_dataset_size):
355357 input_name = input_name )
356358 synset_id = data_reader .get_synset_id (ilsvrc2012_dataset_path , calibration_dataset_size ,
357359 prediction_dataset_size ) # Generate synset id
358- evaluator = ImageClassificationEvaluator (new_model_path , synset_id , data_reader , providers = execution_provider )
360+
361+ # providers = ["CUDAExecutionProvider"]
362+ # -----H100-----
363+ # top 1: 0.7419183673469387
364+ # top 5: 0.9174897959183673
365+ # average latency: 5.676715467755102 ms
366+
367+ # providers = [('TensorrtExecutionProvider', {"trt_fp16_enable": True})]
368+ # -----H100-----
369+ # top 1: 0.7421020408163266
370+ # top 5: 0.917530612244898
371+ # average latency: 27.816876598367347 ms
372+
373+ providers = [('TensorrtExecutionProvider' , {
374+ "trt_fp16_enable" : True ,
375+ "trt_int8_enable" : True ,
376+ "trt_int8_calibration_table_name" : "calibration.flatbuffers" ,
377+ "trt_engine_cache_enable" : True })]
378+ # -----H100-----
379+ # top 1: 0.7101020408163266
380+ # top 5: 0.898061224489796
381+ # average latency: 2.2716067718367348 ms
382+
383+ evaluator = ImageClassificationEvaluator (new_model_path , synset_id , data_reader , providers = providers )
359384 evaluator .predict ()
360385 result = evaluator .get_result ()
361386 evaluator .evaluate (result )
0 commit comments