Skip to content

Commit 7d90b40

Browse files
authored
Merge pull request #484 from yinhaofeng/benchmark
benchmark
2 parents 55772ca + 0343146 commit 7d90b40

File tree

6 files changed

+106
-105
lines changed

6 files changed

+106
-105
lines changed

models/rank/deepfm/config.yaml

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,26 @@
1616
runner:
1717
train_data_dir: "data/sample_data/train"
1818
train_reader_path: "criteo_reader" # importlib format
19-
use_gpu: True
19+
use_gpu: False
2020
use_auc: True
2121
train_batch_size: 2
2222
epochs: 3
2323
print_interval: 2
24-
#model_init_path: "output_model/0" # init model
24+
# model_init_path: "output_model_deepfm/2" # init model
2525
model_save_path: "output_model_deepfm"
2626
test_data_dir: "data/sample_data/train"
2727
infer_reader_path: "criteo_reader" # importlib format
2828
infer_batch_size: 5
2929
infer_load_path: "output_model_deepfm"
3030
infer_start_epoch: 0
3131
infer_end_epoch: 3
32-
33-
32+
#use inference save model
33+
use_inference: False
34+
save_inference_feed_varnames: ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","dense_input"]
35+
save_inference_fetch_varnames: ["sigmoid_0.tmp_0"]
36+
# use fleet
37+
use_fleet: False
38+
3439
# hyper parameters of user-defined network
3540
hyper_parameters:
3641
# optimizer config

models/rank/deepfm/criteo_reader.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,33 @@
1414

1515
from __future__ import print_function
1616
import numpy as np
17-
17+
import paddle
1818
from paddle.io import IterableDataset
1919

2020

2121
class RecDataset(IterableDataset):
2222
def __init__(self, file_list, config):
2323
super(RecDataset, self).__init__()
2424
self.file_list = file_list
25+
if config:
26+
use_fleet = config.get("runner.use_fleet", False)
27+
self.inference = config.get("runner.inference", False)
28+
else:
29+
use_fleet = False
30+
if use_fleet:
31+
worker_id = paddle.distributed.get_rank()
32+
worker_num = paddle.distributed.get_world_size()
33+
file_num = len(file_list)
34+
if file_num < worker_num:
35+
raise ValueError(
36+
"The number of data files is less than the number of workers"
37+
)
38+
blocksize = int(file_num / worker_num)
39+
self.file_list = file_list[worker_id * blocksize:(worker_id + 1) *
40+
blocksize]
41+
remainder = file_num - (blocksize * worker_num)
42+
if worker_id < remainder:
43+
self.file_list.append(file_list[-(worker_id + 1)])
2544
self.init()
2645

2746
def init(self):
@@ -78,4 +97,7 @@ def __iter__(self):
7897
output_list.append(
7998
np.array(output[-1][1]).astype("float32"))
8099
# list
81-
yield output_list
100+
if self.inference:
101+
yield output_list[1:]
102+
else:
103+
yield output_list

models/rank/wide_deep/config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,16 @@ runner:
2020
use_gpu: False
2121
use_auc: True
2222
train_batch_size: 50
23-
epochs: 3
23+
epochs: 4
2424
print_interval: 2
25-
# model_init_path: "output_model_wide_deep/2" # init model
25+
# model_init_path: "models/rank/wide_deep/output_model_wide_deep/2" # init model
2626
model_save_path: "output_model_wide_deep"
2727
test_data_dir: "data/sample_data/train"
2828
infer_reader_path: "criteo_reader" # importlib format
2929
infer_batch_size: 5
3030
infer_load_path: "output_model_wide_deep"
31-
infer_start_epoch: 2
32-
infer_end_epoch: 3
31+
infer_start_epoch: 3
32+
infer_end_epoch: 4
3333
#use inference save model
3434
use_inference: False
3535
save_inference_feed_varnames: ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","dense_input"]

models/rank/wide_deep/config_bigdata.yaml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# global settings
1616

1717
runner:
18-
#train_data_dir: "data/slot_train_data_full"
18+
#train_data_dir: "data/slot_train_data_full"
1919
train_data_dir: "../../../datasets/criteo/slot_train_data_full"
2020
train_reader_path: "criteo_reader" # importlib format
2121
use_gpu: True
@@ -29,8 +29,14 @@ runner:
2929
infer_reader_path: "criteo_reader" # importlib format
3030
infer_batch_size: 512
3131
infer_load_path: "output_model_all_wide_deep"
32-
infer_start_epoch: 0
32+
infer_start_epoch: 3
3333
infer_end_epoch: 4
34+
#use inference save model
35+
use_inference: False
36+
save_inference_feed_varnames: ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","dense_input"]
37+
save_inference_fetch_varnames: ["sigmoid_0.tmp_0"]
38+
#use fleet
39+
use_fleet: False
3440

3541
# hyper parameters of user-defined network
3642
hyper_parameters:

tools/paddle_infer.py

Lines changed: 49 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,16 @@
1919
import time
2020
import logging
2121
import sys
22+
import re
2223
from importlib import import_module
2324
__dir__ = os.path.dirname(os.path.abspath(__file__))
2425
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
2526
from utils.utils_single import load_yaml, load_dy_model_class, get_abs_model
2627
from utils.save_load import save_model, load_model
27-
from utils.benchmark_utils import PaddleInferBenchmark
2828
from paddle.io import DistributedBatchSampler, DataLoader
2929
import argparse
3030
from paddle.inference import Config
3131
from paddle.inference import create_predictor
32-
import pynvml
33-
import psutil
34-
import GPUtil
3532

3633

3734
def parse_args():
@@ -47,18 +44,35 @@ def parse_args():
4744
parser.add_argument("--cpu_threads", type=int, default=1)
4845
parser.add_argument("--enable_mkldnn", type=str, default="False")
4946
parser.add_argument("--enable_tensorRT", type=str, default="False")
47+
parser.add_argument("--benchmark", type=str, default="True")
48+
parser.add_argument("--save_log_path", type=str, default="./output")
49+
parser.add_argument("--precision", type=str)
5050
args = parser.parse_args()
5151
args.use_gpu = (True if args.use_gpu.lower() == "true" else False)
5252
args.enable_mkldnn = (True
5353
if args.enable_mkldnn.lower() == "true" else False)
5454
args.enable_tensorRT = (True if args.enable_tensorRT.lower() == "true" else
5555
False)
56+
args.benchmark = (True if args.benchmark.lower() == "true" else False)
5657
return args
5758

5859

5960
def init_predictor(args):
6061
if args.model_dir:
61-
config = Config(args.model_dir)
62+
has_model = 0
63+
pdmodel_name = 0
64+
pdiparams_name = 0
65+
for file_name in os.listdir(args.model_dir):
66+
if re.search("__model__", file_name):
67+
has_model = 1
68+
if file_name.endswith(".pdmodel"):
69+
pdmodel_name = os.path.join(args.model_dir, file_name)
70+
if file_name.endswith(".pdiparams"):
71+
pdiparams_name = os.path.join(args.model_dir, file_name)
72+
if has_model == 1:
73+
config = Config(args.model_dir)
74+
elif pdmodel_name and pdiparams_name:
75+
config = Config(pdmodel_name, pdiparams_name)
6276
else:
6377
config = Config(args.model_file, args.params_file)
6478

@@ -67,7 +81,7 @@ def init_predictor(args):
6781
if args.enable_tensorRT:
6882
config.enable_tensorrt_engine(
6983
max_batch_size=args.batchsize,
70-
min_subgraph_size=1,
84+
min_subgraph_size=9,
7185
precision_mode=paddle.inference.PrecisionType.Float32)
7286
else:
7387
config.disable_gpu()
@@ -89,122 +103,64 @@ def create_data_loader(args):
89103
sys.path.append(reader_path)
90104
#sys.path.append(os.path.abspath("."))
91105
reader_class = import_module(reader_file)
92-
config = {"inference": True}
106+
config = {"runner.inference": True}
93107
dataset = reader_class.RecDataset(file_list, config=config)
94108
loader = DataLoader(
95109
dataset, batch_size=batchsize, places=place, drop_last=True)
96110
return loader
97111

98112

99-
class Times(object):
100-
def __init__(self):
101-
self.time = 0.
102-
self.st = 0.
103-
self.et = 0.
104-
105-
def start(self):
106-
self.st = time.time()
107-
108-
def end(self, accumulative=True):
109-
self.et = time.time()
110-
if accumulative:
111-
self.time += self.et - self.st
112-
else:
113-
self.time = self.et - self.st
114-
115-
def reset(self):
116-
self.time = 0.
117-
self.st = 0.
118-
self.et = 0.
119-
120-
def value(self):
121-
return round(self.time, 4)
122-
123-
124-
def get_current_memory_mb(gpu_id=None):
125-
pid = os.getpid()
126-
p = psutil.Process(pid)
127-
info = p.memory_full_info()
128-
cpu_mem = info.uss / 1024. / 1024.
129-
gpu_mem = 0
130-
gpu_precent = 0
131-
if gpu_id is not None:
132-
GPUs = GPUtil.getGPUs()
133-
gpu_load = GPUs[gpu_id].load
134-
gpu_precent = gpu_load
135-
pynvml.nvmlInit()
136-
handle = pynvml.nvmlDeviceGetHandleByIndex(0)
137-
meminfo = pynvml.nvmlDeviceGetMemoryInfo(handle)
138-
gpu_mem = meminfo.used / 1024. / 1024.
139-
return cpu_mem, gpu_mem, gpu_precent
140-
141-
142113
def main(args):
143114
predictor, pred_config = init_predictor(args)
144115
place = paddle.set_device('gpu' if args.use_gpu else 'cpu')
145116
args.place = place
146117
input_names = predictor.get_input_names()
147118
output_names = predictor.get_output_names()
148119
test_dataloader = create_data_loader(args)
149-
preprocess_time = Times()
150-
inference_time = Times()
151-
postprocess_time = Times()
152-
cpu_mem, gpu_mem = 0, 0
153-
gpu_id = 0
154-
gpu_util = 0
120+
121+
if args.benchmark:
122+
import auto_log
123+
pid = os.getpid()
124+
autolog = auto_log.AutoLogger(
125+
model_name=args.model_name,
126+
model_precision=args.precision,
127+
batch_size=args.batchsize,
128+
data_shape="dynamic",
129+
save_path=args.save_log_path,
130+
inference_config=pred_config,
131+
pids=pid,
132+
process_name=None,
133+
gpu_ids=0,
134+
time_keys=[
135+
'preprocess_time', 'inference_time', 'postprocess_time'
136+
])
137+
155138
for batch_id, batch_data in enumerate(test_dataloader):
156139
name_data_pair = dict(zip(input_names, batch_data))
157-
preprocess_time.start()
140+
if args.benchmark:
141+
autolog.times.start()
158142
for name in input_names:
159143
input_tensor = predictor.get_input_handle(name)
160144
input_tensor.copy_from_cpu(name_data_pair[name].numpy())
161-
preprocess_time.end(accumulative=True)
162-
inference_time.start()
145+
if args.benchmark:
146+
autolog.times.stamp()
163147
predictor.run()
164148
for name in output_names:
165149
output_tensor = predictor.get_output_handle(name)
166150
output_data = output_tensor.copy_to_cpu()
167-
inference_time.end(accumulative=True)
168151
results = []
169152
results_type = []
170-
postprocess_time.start()
153+
if args.benchmark:
154+
autolog.times.stamp()
171155
for name in output_names:
172156
results_type.append(output_tensor.type())
173157
results.append(output_data[0])
174-
postprocess_time.end(accumulative=True)
175-
cm, gm, gu = get_current_memory_mb(gpu_id)
176-
cpu_mem += cm
177-
gpu_mem += gm
178-
gpu_util += gu
158+
if args.benchmark:
159+
autolog.times.end(stamp=True)
179160
print(results)
180161

181-
num_test_data = args.batchsize * (batch_id + 1)
182-
average_preprocess_time = preprocess_time.value() / (batch_id + 1)
183-
average_inference_time = inference_time.value() / (batch_id + 1)
184-
average_postprocess_time = postprocess_time.value() / (batch_id + 1)
185-
cpu_rss = cpu_mem / (batch_id + 1)
186-
gpu_rss = gpu_mem / (batch_id + 1)
187-
gpu_util = gpu_util / (batch_id + 1)
188-
189-
perf_info = {
190-
'inference_time_s': average_inference_time,
191-
'preprocess_time_s': average_preprocess_time,
192-
'postprocess_time_s': average_postprocess_time
193-
}
194-
model_info = {'model_name': args.model_name, 'precision': "fp32"}
195-
data_info = {
196-
'batch_size': args.batchsize,
197-
'shape': "dynamic_shape",
198-
'data_num': num_test_data
199-
}
200-
resource_info = {
201-
'cpu_rss_mb': cpu_rss,
202-
'gpu_rss_mb': gpu_rss,
203-
'gpu_util': gpu_util
204-
}
205-
rec_log = PaddleInferBenchmark(pred_config, model_info, data_info,
206-
perf_info, resource_info)
207-
rec_log('Rec')
162+
if args.benchmark:
163+
autolog.report()
208164

209165

210166
if __name__ == '__main__':

tools/to_static.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
def parse_args():
3838
parser = argparse.ArgumentParser(description='paddle-rec run')
3939
parser.add_argument("-m", "--config_yaml", type=str)
40+
parser.add_argument("-o", "--opt", nargs='*', type=str)
4041
args = parser.parse_args()
4142
args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
4243
args.config_yaml = get_abs_model(args.config_yaml)
@@ -49,6 +50,17 @@ def main(args):
4950
config = load_yaml(args.config_yaml)
5051
dy_model_class = load_dy_model_class(args.abs_dir)
5152
config["config_abs_dir"] = args.abs_dir
53+
# modify config from command
54+
if args.opt:
55+
for parameter in args.opt:
56+
parameter = parameter.strip()
57+
key, value = parameter.split("=")
58+
if type(config.get(key)) is int:
59+
value = int(value)
60+
if type(config.get(key)) is bool:
61+
value = (True if value.lower() == "true" else False)
62+
config[key] = value
63+
5264
# tools.vars
5365
use_gpu = config.get("runner.use_gpu", True)
5466
train_data_dir = config.get("runner.train_data_dir", None)

0 commit comments

Comments
 (0)