2828import argparse
2929from paddle .inference import Config
3030from paddle .inference import create_predictor
31+ import pynvml
32+ import psutil
33+ import GPUtil
3134
3235
3336def parse_args ():
@@ -39,6 +42,7 @@ def parse_args():
3942 parser .add_argument ("--data_dir" , type = str )
4043 parser .add_argument ("--reader_file" , type = str )
4144 parser .add_argument ("--batchsize" , type = int )
45+ parser .add_argument ("--model_name" , type = str , default = "not specified" )
4246 args = parser .parse_args ()
4347 return args
4448
@@ -71,26 +75,123 @@ def create_data_loader(args):
7175 return loader
7276
7377
78+ def log_print (args , results_type , num_test_data , average_preprocess_time ,
79+ average_inference_time , average_postprocess_time , cpu_rss ,
80+ gpu_rss , gpu_util ):
81+ print ("----------------------- Model info ----------------------" )
82+ print ("model_name: {}\n type: {}\n model_sorce: {}" .format (
83+ args .model_name , "static" , "PaddleRec" ))
84+ print ("----------------------- Data info -----------------------" )
85+ print ("batch_size: {}" .format (args .batchsize ))
86+ print ("----------------------- Conf info -----------------------" )
87+ print ("runtime_device: {}" .format ("gpu" if args .use_gpu else "cpu" ))
88+ print ("ir_optim: {}\n enable_memory_optim: {}\n enable_tensorrt: {}" .format (
89+ "False" , "False" , "False" ))
90+ print ("precision: {}" .format ([str (x ).split ("." )[1 ] for x in results_type ]))
91+ print ("enable_mkldnn: {}\n cpu_math_library_num_threads: {}" .format ("False" ,
92+ 1 ))
93+ print ("----------------------- Perf info -----------------------" )
94+ print (
95+ "average preprocess_time(ms): {}\n average inference_time(ms): {}\n average postprocess_time(ms): {}" .
96+ format (average_preprocess_time * 1000 , average_inference_time * 1000 ,
97+ average_postprocess_time * 1000 ))
98+ print ("The number of predicted data: {}" .format (num_test_data ))
99+ print ("cpu_rss(MB): {}, gpu_rss(MB): {}" .format (cpu_rss , gpu_rss ))
100+ print ("gpu_util: {}%" .format (str (gpu_util * 100 )[:4 ]))
101+
102+
103+ class Times (object ):
104+ def __init__ (self ):
105+ self .time = 0.
106+ self .st = 0.
107+ self .et = 0.
108+
109+ def start (self ):
110+ self .st = time .time ()
111+
112+ def end (self , accumulative = True ):
113+ self .et = time .time ()
114+ if accumulative :
115+ self .time += self .et - self .st
116+ else :
117+ self .time = self .et - self .st
118+
119+ def reset (self ):
120+ self .time = 0.
121+ self .st = 0.
122+ self .et = 0.
123+
124+ def value (self ):
125+ return round (self .time , 4 )
126+
127+
128+ def get_current_memory_mb (gpu_id = None ):
129+ pid = os .getpid ()
130+ p = psutil .Process (pid )
131+ info = p .memory_full_info ()
132+ cpu_mem = info .uss / 1024. / 1024.
133+ gpu_mem = 0
134+ gpu_precent = 0
135+ if gpu_id is not None :
136+ GPUs = GPUtil .getGPUs ()
137+ gpu_load = GPUs [gpu_id ].load
138+ gpu_precent = gpu_load
139+ pynvml .nvmlInit ()
140+ handle = pynvml .nvmlDeviceGetHandleByIndex (0 )
141+ meminfo = pynvml .nvmlDeviceGetMemoryInfo (handle )
142+ gpu_mem = meminfo .used / 1024. / 1024.
143+ return cpu_mem , gpu_mem , gpu_precent
144+
145+
74146def main (args ):
75147 predictor = init_predictor (args )
76148 place = paddle .set_device ('gpu' if args .use_gpu else 'cpu' )
77149 args .place = place
78150 input_names = predictor .get_input_names ()
79151 output_names = predictor .get_output_names ()
80152 test_dataloader = create_data_loader (args )
153+ preprocess_time = Times ()
154+ inference_time = Times ()
155+ postprocess_time = Times ()
156+ cpu_mem , gpu_mem = 0 , 0
157+ gpu_id = 0
158+ gpu_util = 0
81159 for batch_id , batch_data in enumerate (test_dataloader ):
82160 name_data_pair = dict (zip (input_names , batch_data ))
161+ preprocess_time .start ()
83162 for name in input_names :
84163 input_tensor = predictor .get_input_handle (name )
85164 input_tensor .copy_from_cpu (name_data_pair [name ].numpy ())
165+ preprocess_time .end (accumulative = True )
166+ inference_time .start ()
86167 predictor .run ()
168+ inference_time .end (accumulative = True )
87169 results = []
170+ results_type = []
171+ postprocess_time .start ()
88172 for name in output_names :
89173 output_tensor = predictor .get_output_handle (name )
90- output_data = output_tensor .copy_to_cpu ()[0 ]
91- results .append (output_data )
174+ results_type .append (output_tensor .type ())
175+ output_data = output_tensor .copy_to_cpu ()
176+ results .append (output_data [0 ])
177+ postprocess_time .end (accumulative = True )
178+ cm , gm , gu = get_current_memory_mb (gpu_id )
179+ cpu_mem += cm
180+ gpu_mem += gm
181+ gpu_util += gu
92182 print (results )
93183
184+ num_test_data = args .batchsize * (batch_id + 1 )
185+ average_preprocess_time = preprocess_time .value () / num_test_data
186+ average_inference_time = inference_time .value () / num_test_data
187+ average_postprocess_time = postprocess_time .value () / num_test_data
188+ cpu_rss = cpu_mem / num_test_data
189+ gpu_rss = gpu_mem / num_test_data
190+ gpu_util = gpu_util / num_test_data
191+ log_print (args , results_type , num_test_data , average_preprocess_time ,
192+ average_inference_time , average_postprocess_time , cpu_rss ,
193+ gpu_rss , gpu_util )
194+
94195
95196if __name__ == '__main__' :
96197 args = parse_args ()
0 commit comments