@@ -43,8 +43,15 @@ def parse_args():
4343 parser .add_argument ("--reader_file" , type = str )
4444 parser .add_argument ("--batchsize" , type = int )
4545 parser .add_argument ("--model_name" , type = str , default = "not specified" )
46+ parser .add_argument ("--cpu_threads" , type = int , default = 1 )
47+ parser .add_argument ("--enable_mkldnn" , type = str , default = "False" )
48+ parser .add_argument ("--enable_tensorRT" , type = str , default = "False" )
4649 args = parser .parse_args ()
4750 args .use_gpu = (True if args .use_gpu .lower () == "true" else False )
51+ args .enable_mkldnn = (True
52+ if args .enable_mkldnn .lower () == "true" else False )
53+ args .enable_tensorRT = (True if args .enable_tensorRT .lower () == "true" else
54+ False )
4855 return args
4956
5057
@@ -56,10 +63,17 @@ def init_predictor(args):
5663
5764 if args .use_gpu :
5865 config .enable_use_gpu (1000 , 0 )
66+ if args .enable_tensorRT :
67+ config .enable_tensorrt_engine (
68+ max_batch_size = args .batchsize ,
69+ min_subgraph_size = 1 ,
70+ precision_mode = paddle .inference .PrecisionType .Float32 )
5971 else :
6072 config .disable_gpu ()
61- print (config )
62- # config.delete('repeated_fc_relu_fuse_pass')
73+ # config.delete_pass("repeated_fc_relu_fuse_pass")
74+ config .set_cpu_math_library_num_threads (args .cpu_threads )
75+ if args .enable_mkldnn :
76+ config .enable_mkldnn ()
6377 predictor = create_predictor (config )
6478 return predictor
6579
@@ -91,16 +105,19 @@ def log_print(args, results_type, num_test_data, average_preprocess_time,
91105 print ("----------------------- Conf info -----------------------" )
92106 print ("runtime_device: {}" .format ("gpu" if args .use_gpu else "cpu" ))
93107 print ("ir_optim: {}\n enable_memory_optim: {}\n enable_tensorrt: {}" .format (
94- "False" , "False" , "False" ))
108+ "False" , "False" , args . enable_tensorRT ))
95109 print ("precision: {}" .format ([str (x ).split ("." )[1 ] for x in results_type ]))
96- print ("enable_mkldnn: {}\n cpu_math_library_num_threads: {}" .format ("False" ,
97- 1 ))
110+ print ("enable_mkldnn: {}\n cpu_math_library_num_threads: {}" .format (
111+ args . enable_mkldnn , args . cpu_threads ))
98112 print ("----------------------- Perf info -----------------------" )
99113 print (
100114 "preprocess_time(ms): {}\n inference_time(ms): {}\n postprocess_time(ms): {}" .
101115 format (average_preprocess_time * 1000 , average_inference_time * 1000 ,
102116 average_postprocess_time * 1000 ))
103117 print ("The number of predicted data: {}" .format (num_test_data ))
118+ print ("total time spend(s): {:.5f}" .format (
119+ (average_preprocess_time + average_inference_time +
120+ average_postprocess_time ) * num_test_data ))
104121 print ("cpu_rss(MB): {}, gpu_rss(MB): {}" .format (cpu_rss , gpu_rss ))
105122 print ("gpu_util: {}%" .format (str (gpu_util * 100 )[:4 ]))
106123
@@ -190,9 +207,9 @@ def main(args):
190207 average_preprocess_time = preprocess_time .value () / num_test_data
191208 average_inference_time = inference_time .value () / num_test_data
192209 average_postprocess_time = postprocess_time .value () / num_test_data
193- cpu_rss = cpu_mem / num_test_data
194- gpu_rss = gpu_mem / num_test_data
195- gpu_util = gpu_util / num_test_data
210+ cpu_rss = cpu_mem / ( batch_id + 1 )
211+ gpu_rss = gpu_mem / ( batch_id + 1 )
212+ gpu_util = gpu_util / ( batch_id + 1 )
196213 log_print (args , results_type , num_test_data , average_preprocess_time ,
197214 average_inference_time , average_postprocess_time , cpu_rss ,
198215 gpu_rss , gpu_util )
0 commit comments