Skip to content

Commit 9aecccf

Browse files
authored
Merge pull request #421 from yinhaofeng/inference_python
add threads,mkldnn,trt
2 parents 28713b4 + 648f71d commit 9aecccf

File tree

2 files changed

+28
-8
lines changed

2 files changed

+28
-8
lines changed

doc/inference.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ pip install GPUtil
4545
| --reader_file | string | 任意路径 || 测试时用的Reader()所在python文件地址 |
4646
| --batchsize | int | >= 1 || 批训练样本数量 |
4747
| --model_name | str | 任意名字 || 输出模型名字 |
48+
| --cpu_threads | int | >= 1 || 在使用cpu时指定线程数,在使用gpu时此参数无效 |
49+
| --enable_mkldnn | bool | True/False || 在使用cpu时是否开启mkldnn加速,在使用gpu时此参数无效 |
50+
| --enable_tensorRT | bool | True/False || 在使用gpu时是否开启tensorRT加速,在使用cpu时此参数无效 |
4851

4952
2. 以wide_deep模型的demo数据为例,启动预测:
5053
```bash

tools/paddle_infer.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,15 @@ def parse_args():
4343
parser.add_argument("--reader_file", type=str)
4444
parser.add_argument("--batchsize", type=int)
4545
parser.add_argument("--model_name", type=str, default="not specified")
46+
parser.add_argument("--cpu_threads", type=int, default=1)
47+
parser.add_argument("--enable_mkldnn", type=str, default="False")
48+
parser.add_argument("--enable_tensorRT", type=str, default="False")
4649
args = parser.parse_args()
4750
args.use_gpu = (True if args.use_gpu.lower() == "true" else False)
51+
args.enable_mkldnn = (True
52+
if args.enable_mkldnn.lower() == "true" else False)
53+
args.enable_tensorRT = (True if args.enable_tensorRT.lower() == "true" else
54+
False)
4855
return args
4956

5057

@@ -56,10 +63,17 @@ def init_predictor(args):
5663

5764
if args.use_gpu:
5865
config.enable_use_gpu(1000, 0)
66+
if args.enable_tensorRT:
67+
config.enable_tensorrt_engine(
68+
max_batch_size=args.batchsize,
69+
min_subgraph_size=1,
70+
precision_mode=paddle.inference.PrecisionType.Float32)
5971
else:
6072
config.disable_gpu()
61-
print(config)
62-
# config.delete('repeated_fc_relu_fuse_pass')
73+
# config.delete_pass("repeated_fc_relu_fuse_pass")
74+
config.set_cpu_math_library_num_threads(args.cpu_threads)
75+
if args.enable_mkldnn:
76+
config.enable_mkldnn()
6377
predictor = create_predictor(config)
6478
return predictor
6579

@@ -91,16 +105,19 @@ def log_print(args, results_type, num_test_data, average_preprocess_time,
91105
print("----------------------- Conf info -----------------------")
92106
print("runtime_device: {}".format("gpu" if args.use_gpu else "cpu"))
93107
print("ir_optim: {}\nenable_memory_optim: {}\nenable_tensorrt: {}".format(
94-
"False", "False", "False"))
108+
"False", "False", args.enable_tensorRT))
95109
print("precision: {}".format([str(x).split(".")[1] for x in results_type]))
96-
print("enable_mkldnn: {}\ncpu_math_library_num_threads: {}".format("False",
97-
1))
110+
print("enable_mkldnn: {}\ncpu_math_library_num_threads: {}".format(
111+
args.enable_mkldnn, args.cpu_threads))
98112
print("----------------------- Perf info -----------------------")
99113
print(
100114
"preprocess_time(ms): {}\ninference_time(ms): {}\npostprocess_time(ms): {}".
101115
format(average_preprocess_time * 1000, average_inference_time * 1000,
102116
average_postprocess_time * 1000))
103117
print("The number of predicted data: {}".format(num_test_data))
118+
print("total time spend(s): {:.5f}".format(
119+
(average_preprocess_time + average_inference_time +
120+
average_postprocess_time) * num_test_data))
104121
print("cpu_rss(MB): {}, gpu_rss(MB): {}".format(cpu_rss, gpu_rss))
105122
print("gpu_util: {}%".format(str(gpu_util * 100)[:4]))
106123

@@ -190,9 +207,9 @@ def main(args):
190207
average_preprocess_time = preprocess_time.value() / num_test_data
191208
average_inference_time = inference_time.value() / num_test_data
192209
average_postprocess_time = postprocess_time.value() / num_test_data
193-
cpu_rss = cpu_mem / num_test_data
194-
gpu_rss = gpu_mem / num_test_data
195-
gpu_util = gpu_util / num_test_data
210+
cpu_rss = cpu_mem / (batch_id + 1)
211+
gpu_rss = gpu_mem / (batch_id + 1)
212+
gpu_util = gpu_util / (batch_id + 1)
196213
log_print(args, results_type, num_test_data, average_preprocess_time,
197214
average_inference_time, average_postprocess_time, cpu_rss,
198215
gpu_rss, gpu_util)

0 commit comments

Comments
 (0)