1919import time
2020import logging
2121import sys
22+ import re
2223from importlib import import_module
2324__dir__ = os .path .dirname (os .path .abspath (__file__ ))
2425sys .path .append (os .path .abspath (os .path .join (__dir__ , '..' )))
2526from utils .utils_single import load_yaml , load_dy_model_class , get_abs_model
2627from utils .save_load import save_model , load_model
27- from utils .benchmark_utils import PaddleInferBenchmark
2828from paddle .io import DistributedBatchSampler , DataLoader
2929import argparse
3030from paddle .inference import Config
3131from paddle .inference import create_predictor
32- import pynvml
33- import psutil
34- import GPUtil
3532
3633
3734def parse_args ():
@@ -47,18 +44,35 @@ def parse_args():
4744 parser .add_argument ("--cpu_threads" , type = int , default = 1 )
4845 parser .add_argument ("--enable_mkldnn" , type = str , default = "False" )
4946 parser .add_argument ("--enable_tensorRT" , type = str , default = "False" )
47+ parser .add_argument ("--benchmark" , type = str , default = "True" )
48+ parser .add_argument ("--save_log_path" , type = str , default = "./output" )
49+ parser .add_argument ("--precision" , type = str )
5050 args = parser .parse_args ()
5151 args .use_gpu = (True if args .use_gpu .lower () == "true" else False )
5252 args .enable_mkldnn = (True
5353 if args .enable_mkldnn .lower () == "true" else False )
5454 args .enable_tensorRT = (True if args .enable_tensorRT .lower () == "true" else
5555 False )
56+ args .benchmark = (True if args .benchmark .lower () == "true" else False )
5657 return args
5758
5859
5960def init_predictor (args ):
6061 if args .model_dir :
61- config = Config (args .model_dir )
62+ has_model = 0
63+ pdmodel_name = 0
64+ pdiparams_name = 0
65+ for file_name in os .listdir (args .model_dir ):
66+ if re .search ("__model__" , file_name ):
67+ has_model = 1
68+ if file_name .endswith (".pdmodel" ):
69+ pdmodel_name = os .path .join (args .model_dir , file_name )
70+ if file_name .endswith (".pdiparams" ):
71+ pdiparams_name = os .path .join (args .model_dir , file_name )
72+ if has_model == 1 :
73+ config = Config (args .model_dir )
74+ elif pdmodel_name and pdiparams_name :
75+ config = Config (pdmodel_name , pdiparams_name )
6276 else :
6377 config = Config (args .model_file , args .params_file )
6478
@@ -67,7 +81,7 @@ def init_predictor(args):
6781 if args .enable_tensorRT :
6882 config .enable_tensorrt_engine (
6983 max_batch_size = args .batchsize ,
70- min_subgraph_size = 1 ,
84+ min_subgraph_size = 9 ,
7185 precision_mode = paddle .inference .PrecisionType .Float32 )
7286 else :
7387 config .disable_gpu ()
@@ -89,122 +103,64 @@ def create_data_loader(args):
89103 sys .path .append (reader_path )
90104 #sys.path.append(os.path.abspath("."))
91105 reader_class = import_module (reader_file )
92- config = {"inference" : True }
106+ config = {"runner. inference" : True }
93107 dataset = reader_class .RecDataset (file_list , config = config )
94108 loader = DataLoader (
95109 dataset , batch_size = batchsize , places = place , drop_last = True )
96110 return loader
97111
98112
99- class Times (object ):
100- def __init__ (self ):
101- self .time = 0.
102- self .st = 0.
103- self .et = 0.
104-
105- def start (self ):
106- self .st = time .time ()
107-
108- def end (self , accumulative = True ):
109- self .et = time .time ()
110- if accumulative :
111- self .time += self .et - self .st
112- else :
113- self .time = self .et - self .st
114-
115- def reset (self ):
116- self .time = 0.
117- self .st = 0.
118- self .et = 0.
119-
120- def value (self ):
121- return round (self .time , 4 )
122-
123-
124- def get_current_memory_mb (gpu_id = None ):
125- pid = os .getpid ()
126- p = psutil .Process (pid )
127- info = p .memory_full_info ()
128- cpu_mem = info .uss / 1024. / 1024.
129- gpu_mem = 0
130- gpu_precent = 0
131- if gpu_id is not None :
132- GPUs = GPUtil .getGPUs ()
133- gpu_load = GPUs [gpu_id ].load
134- gpu_precent = gpu_load
135- pynvml .nvmlInit ()
136- handle = pynvml .nvmlDeviceGetHandleByIndex (0 )
137- meminfo = pynvml .nvmlDeviceGetMemoryInfo (handle )
138- gpu_mem = meminfo .used / 1024. / 1024.
139- return cpu_mem , gpu_mem , gpu_precent
140-
141-
142113def main (args ):
143114 predictor , pred_config = init_predictor (args )
144115 place = paddle .set_device ('gpu' if args .use_gpu else 'cpu' )
145116 args .place = place
146117 input_names = predictor .get_input_names ()
147118 output_names = predictor .get_output_names ()
148119 test_dataloader = create_data_loader (args )
149- preprocess_time = Times ()
150- inference_time = Times ()
151- postprocess_time = Times ()
152- cpu_mem , gpu_mem = 0 , 0
153- gpu_id = 0
154- gpu_util = 0
120+
121+ if args .benchmark :
122+ import auto_log
123+ pid = os .getpid ()
124+ autolog = auto_log .AutoLogger (
125+ model_name = args .model_name ,
126+ model_precision = args .precision ,
127+ batch_size = args .batchsize ,
128+ data_shape = "dynamic" ,
129+ save_path = args .save_log_path ,
130+ inference_config = pred_config ,
131+ pids = pid ,
132+ process_name = None ,
133+ gpu_ids = 0 ,
134+ time_keys = [
135+ 'preprocess_time' , 'inference_time' , 'postprocess_time'
136+ ])
137+
155138 for batch_id , batch_data in enumerate (test_dataloader ):
156139 name_data_pair = dict (zip (input_names , batch_data ))
157- preprocess_time .start ()
140+ if args .benchmark :
141+ autolog .times .start ()
158142 for name in input_names :
159143 input_tensor = predictor .get_input_handle (name )
160144 input_tensor .copy_from_cpu (name_data_pair [name ].numpy ())
161- preprocess_time . end ( accumulative = True )
162- inference_time . start ()
145+ if args . benchmark :
146+ autolog . times . stamp ()
163147 predictor .run ()
164148 for name in output_names :
165149 output_tensor = predictor .get_output_handle (name )
166150 output_data = output_tensor .copy_to_cpu ()
167- inference_time .end (accumulative = True )
168151 results = []
169152 results_type = []
170- postprocess_time .start ()
153+ if args .benchmark :
154+ autolog .times .stamp ()
171155 for name in output_names :
172156 results_type .append (output_tensor .type ())
173157 results .append (output_data [0 ])
174- postprocess_time .end (accumulative = True )
175- cm , gm , gu = get_current_memory_mb (gpu_id )
176- cpu_mem += cm
177- gpu_mem += gm
178- gpu_util += gu
158+ if args .benchmark :
159+ autolog .times .end (stamp = True )
179160 print (results )
180161
181- num_test_data = args .batchsize * (batch_id + 1 )
182- average_preprocess_time = preprocess_time .value () / (batch_id + 1 )
183- average_inference_time = inference_time .value () / (batch_id + 1 )
184- average_postprocess_time = postprocess_time .value () / (batch_id + 1 )
185- cpu_rss = cpu_mem / (batch_id + 1 )
186- gpu_rss = gpu_mem / (batch_id + 1 )
187- gpu_util = gpu_util / (batch_id + 1 )
188-
189- perf_info = {
190- 'inference_time_s' : average_inference_time ,
191- 'preprocess_time_s' : average_preprocess_time ,
192- 'postprocess_time_s' : average_postprocess_time
193- }
194- model_info = {'model_name' : args .model_name , 'precision' : "fp32" }
195- data_info = {
196- 'batch_size' : args .batchsize ,
197- 'shape' : "dynamic_shape" ,
198- 'data_num' : num_test_data
199- }
200- resource_info = {
201- 'cpu_rss_mb' : cpu_rss ,
202- 'gpu_rss_mb' : gpu_rss ,
203- 'gpu_util' : gpu_util
204- }
205- rec_log = PaddleInferBenchmark (pred_config , model_info , data_info ,
206- perf_info , resource_info )
207- rec_log ('Rec' )
162+ if args .benchmark :
163+ autolog .report ()
208164
209165
210166if __name__ == '__main__' :
0 commit comments