Skip to content

Commit 161ef56

Browse files
committed
add log
1 parent 0f269c3 commit 161ef56

File tree

3 files changed

+105
-3
lines changed

3 files changed

+105
-3
lines changed

doc/inference.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ paddlerec提供tools/paddle_infer.py脚本,供您方便的使用inference预
3737
| --data_dir | string | 任意路径 || 测试数据目录 |
3838
| --reader_file | string | 任意路径 || 测试时用的Reader()所在python文件地址 |
3939
| --batchsize | int | >= 1 || 批训练样本数量 |
40+
| --model_name | str | 任意名字 || 输出模型名字 |
4041

4142
2. 以wide_deep模型的demo数据为例,启动预测:
4243
```bash

models/rank/wide_deep/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ wide&deep设计了一种融合浅层(wide)模型和深层(deep)模型进
8888

8989
| 模型 | auc | batch_size | thread_num| epoch_num| Time of each epoch |
9090
| :------| :------ | :------| :------ | :------| :------ |
91-
| wide_deep | 0.82 | 512 | 1 | 4 | 约2小时 |
91+
| wide_deep | 0.79 | 512 | 1 | 4 | 约2小时 |
9292

9393
1. 确认您当前所在目录为PaddleRec/models/rank/wide_deep
9494
2. 进入paddlerec/datasets/criteo目录下,执行该脚本,会从国内源的服务器上下载我们预处理完成的criteo全量数据集,并解压到指定文件夹。

tools/paddle_infer.py

Lines changed: 103 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
import argparse
2929
from paddle.inference import Config
3030
from paddle.inference import create_predictor
31+
import pynvml
32+
import psutil
33+
import GPUtil
3134

3235

3336
def parse_args():
@@ -39,6 +42,7 @@ def parse_args():
3942
parser.add_argument("--data_dir", type=str)
4043
parser.add_argument("--reader_file", type=str)
4144
parser.add_argument("--batchsize", type=int)
45+
parser.add_argument("--model_name", type=str, default="not specified")
4246
args = parser.parse_args()
4347
return args
4448

@@ -71,26 +75,123 @@ def create_data_loader(args):
7175
return loader
7276

7377

78+
def log_print(args, results_type, num_test_data, average_preprocess_time,
79+
average_inference_time, average_postprocess_time, cpu_rss,
80+
gpu_rss, gpu_util):
81+
print("----------------------- Model info ----------------------")
82+
print("model_name: {}\ntype: {}\nmodel_sorce: {}".format(
83+
args.model_name, "static", "PaddleRec"))
84+
print("----------------------- Data info -----------------------")
85+
print("batch_size: {}".format(args.batchsize))
86+
print("----------------------- Conf info -----------------------")
87+
print("runtime_device: {}".format("gpu" if args.use_gpu else "cpu"))
88+
print("ir_optim: {}\nenable_memory_optim: {}\nenable_tensorrt: {}".format(
89+
"False", "False", "False"))
90+
print("precision: {}".format([str(x).split(".")[1] for x in results_type]))
91+
print("enable_mkldnn: {}\ncpu_math_library_num_threads: {}".format("False",
92+
1))
93+
print("----------------------- Perf info -----------------------")
94+
print(
95+
"average preprocess_time(ms): {}\naverage inference_time(ms): {}\naverage postprocess_time(ms): {}".
96+
format(average_preprocess_time * 1000, average_inference_time * 1000,
97+
average_postprocess_time * 1000))
98+
print("The number of predicted data: {}".format(num_test_data))
99+
print("cpu_rss(MB): {}, gpu_rss(MB): {}".format(cpu_rss, gpu_rss))
100+
print("gpu_util: {}%".format(str(gpu_util * 100)[:4]))
101+
102+
103+
class Times(object):
104+
def __init__(self):
105+
self.time = 0.
106+
self.st = 0.
107+
self.et = 0.
108+
109+
def start(self):
110+
self.st = time.time()
111+
112+
def end(self, accumulative=True):
113+
self.et = time.time()
114+
if accumulative:
115+
self.time += self.et - self.st
116+
else:
117+
self.time = self.et - self.st
118+
119+
def reset(self):
120+
self.time = 0.
121+
self.st = 0.
122+
self.et = 0.
123+
124+
def value(self):
125+
return round(self.time, 4)
126+
127+
128+
def get_current_memory_mb(gpu_id=None):
129+
pid = os.getpid()
130+
p = psutil.Process(pid)
131+
info = p.memory_full_info()
132+
cpu_mem = info.uss / 1024. / 1024.
133+
gpu_mem = 0
134+
gpu_precent = 0
135+
if gpu_id is not None:
136+
GPUs = GPUtil.getGPUs()
137+
gpu_load = GPUs[gpu_id].load
138+
gpu_precent = gpu_load
139+
pynvml.nvmlInit()
140+
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
141+
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
142+
gpu_mem = meminfo.used / 1024. / 1024.
143+
return cpu_mem, gpu_mem, gpu_precent
144+
145+
74146
def main(args):
75147
predictor = init_predictor(args)
76148
place = paddle.set_device('gpu' if args.use_gpu else 'cpu')
77149
args.place = place
78150
input_names = predictor.get_input_names()
79151
output_names = predictor.get_output_names()
80152
test_dataloader = create_data_loader(args)
153+
preprocess_time = Times()
154+
inference_time = Times()
155+
postprocess_time = Times()
156+
cpu_mem, gpu_mem = 0, 0
157+
gpu_id = 0
158+
gpu_util = 0
81159
for batch_id, batch_data in enumerate(test_dataloader):
82160
name_data_pair = dict(zip(input_names, batch_data))
161+
preprocess_time.start()
83162
for name in input_names:
84163
input_tensor = predictor.get_input_handle(name)
85164
input_tensor.copy_from_cpu(name_data_pair[name].numpy())
165+
preprocess_time.end(accumulative=True)
166+
inference_time.start()
86167
predictor.run()
168+
inference_time.end(accumulative=True)
87169
results = []
170+
results_type = []
171+
postprocess_time.start()
88172
for name in output_names:
89173
output_tensor = predictor.get_output_handle(name)
90-
output_data = output_tensor.copy_to_cpu()[0]
91-
results.append(output_data)
174+
results_type.append(output_tensor.type())
175+
output_data = output_tensor.copy_to_cpu()
176+
results.append(output_data[0])
177+
postprocess_time.end(accumulative=True)
178+
cm, gm, gu = get_current_memory_mb(gpu_id)
179+
cpu_mem += cm
180+
gpu_mem += gm
181+
gpu_util += gu
92182
print(results)
93183

184+
num_test_data = args.batchsize * (batch_id + 1)
185+
average_preprocess_time = preprocess_time.value() / num_test_data
186+
average_inference_time = inference_time.value() / num_test_data
187+
average_postprocess_time = postprocess_time.value() / num_test_data
188+
cpu_rss = cpu_mem / num_test_data
189+
gpu_rss = gpu_mem / num_test_data
190+
gpu_util = gpu_util / num_test_data
191+
log_print(args, results_type, num_test_data, average_preprocess_time,
192+
average_inference_time, average_postprocess_time, cpu_rss,
193+
gpu_rss, gpu_util)
194+
94195

95196
if __name__ == '__main__':
96197
args = parse_args()

0 commit comments

Comments
 (0)