Skip to content

Commit ad8074b

Browse files
committed
log normalization
1 parent 11722f2 commit ad8074b

File tree

4 files changed

+88
-26
lines changed

4 files changed

+88
-26
lines changed

tools/infer.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
def parse_args():
5151
parser = argparse.ArgumentParser(description='paddle-rec run')
5252
parser.add_argument("-m", "--config_yaml", type=str)
53+
parser.add_argument("--device", type=str)
5354
args = parser.parse_args()
5455
args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
5556
args.config_yaml = get_abs_model(args.config_yaml)
@@ -63,19 +64,26 @@ def main(args):
6364
dy_model_class = load_dy_model_class(args.abs_dir)
6465
config["config_abs_dir"] = args.abs_dir
6566
# tools.vars
66-
use_gpu = config.get("runner.use_gpu", True)
67+
if args.device is None:
68+
use_gpu = config.get("runner.use_gpu", True)
69+
elif args.device == "gpu":
70+
use_gpu = True
71+
else:
72+
use_gpu = False
73+
6774
use_visual = config.get("runner.use_visual", False)
6875
test_data_dir = config.get("runner.test_data_dir", None)
6976
print_interval = config.get("runner.print_interval", None)
77+
infer_batch_size = config.get("runner.infer_batch_size", None)
7078
model_load_path = config.get("runner.infer_load_path", "model_output")
7179
start_epoch = config.get("runner.infer_start_epoch", 0)
7280
end_epoch = config.get("runner.infer_end_epoch", 10)
7381

7482
logger.info("**************common.configs**********")
7583
logger.info(
76-
"use_gpu: {}, use_visual: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}".
77-
format(use_gpu, use_visual, test_data_dir, start_epoch, end_epoch,
78-
print_interval, model_load_path))
84+
"use_gpu: {}, use_visual: {}, infer_batch_size: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}".
85+
format(use_gpu, use_visual, infer_batch_size, test_data_dir,
86+
start_epoch, end_epoch, print_interval, model_load_path))
7987
logger.info("**************common.configs**********")
8088

8189
place = paddle.set_device('gpu' if use_gpu else 'cpu')
@@ -105,12 +113,20 @@ def main(args):
105113
model_path = os.path.join(model_load_path, str(epoch_id))
106114
load_model(model_path, dy_model)
107115
dy_model.eval()
116+
infer_reader_cost = 0.0
117+
infer_run_cost = 0.0
118+
reader_start = time.time()
119+
108120
for batch_id, batch in enumerate(test_dataloader()):
121+
infer_reader_cost += time.time() - reader_start
122+
infer_start = time.time()
109123
batch_size = len(batch[0])
110124

111125
metric_list, tensor_print_dict = dy_model_class.infer_forward(
112126
dy_model, metric_list, batch, config)
113127

128+
infer_run_cost += time.time() - infer_start
129+
114130
if batch_id % print_interval == 0:
115131
tensor_print_str = ""
116132
if tensor_print_dict is not None:
@@ -133,13 +149,19 @@ def main(args):
133149
tag="infer/" + metric_list_name[metric_id],
134150
step=step_num,
135151
value=metric_list[metric_id].accumulate())
136-
logger.info("epoch: {}, batch_id: {}, ".format(
137-
epoch_id, batch_id) + metric_str + tensor_print_str +
138-
" speed: {:.2f} ins/s".format(
139-
print_interval * batch_size / (time.time(
140-
) - interval_begin)))
152+
logger.info(
153+
"epoch: {}, batch_id: {}, ".format(
154+
epoch_id, batch_id) + metric_str + tensor_print_str +
155+
" avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, speed: {:.2f} ins/s".
156+
format(infer_reader_cost / print_interval, (
157+
infer_reader_cost + infer_run_cost) / print_interval,
158+
print_interval * batch_size / (time.time() -
159+
interval_begin)))
141160
interval_begin = time.time()
161+
infer_reader_cost = 0.0
162+
infer_run_cost = 0.0
142163
step_num = step_num + 1
164+
reader_start = time.time()
143165

144166
metric_str = ""
145167
for metric_id in range(len(metric_list_name)):

tools/static_infer.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
def parse_args():
3636
parser = argparse.ArgumentParser("PaddleRec train static script")
3737
parser.add_argument("-m", "--config_yaml", type=str)
38+
parser.add_argument("--device", type=str)
3839
args = parser.parse_args()
3940
args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
4041
args.config_yaml = get_abs_model(args.config_yaml)
@@ -56,7 +57,12 @@ def main(args):
5657
fetch_vars = static_model_class.infer_net(input_data)
5758
logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))
5859

59-
use_gpu = config.get("runner.use_gpu", True)
60+
if args.device is None:
61+
use_gpu = config.get("runner.use_gpu", True)
62+
elif args.device == "gpu":
63+
use_gpu = True
64+
else:
65+
use_gpu = False
6066
use_auc = config.get("runner.use_auc", False)
6167
use_visual = config.get("runner.use_visual", False)
6268
auc_num = config.get("runner.auc_num", 1)
@@ -69,9 +75,9 @@ def main(args):
6975
os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
7076
logger.info("**************common.configs**********")
7177
logger.info(
72-
"use_gpu: {}, use_visual: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}".
73-
format(use_gpu, use_visual, test_data_dir, start_epoch, end_epoch,
74-
print_interval, model_load_path))
78+
"use_gpu: {}, use_visual: {}, infer_batch_size: {}, test_data_dir: {}, start_epoch: {}, end_epoch: {}, print_interval: {}, model_load_path: {}".
79+
format(use_gpu, use_visual, batch_size, test_data_dir, start_epoch,
80+
end_epoch, print_interval, model_load_path))
7581
logger.info("**************common.configs**********")
7682

7783
place = paddle.set_device('gpu' if use_gpu else 'cpu')
@@ -98,13 +104,20 @@ def main(args):
98104

99105
epoch_begin = time.time()
100106
interval_begin = time.time()
107+
infer_reader_cost = 0.0
108+
infer_run_cost = 0.0
109+
reader_start = time.time()
110+
101111
if use_auc:
102112
reset_auc(auc_num)
103113
for batch_id, batch_data in enumerate(test_dataloader()):
114+
infer_reader_cost += time.time() - reader_start
115+
infer_start = time.time()
104116
fetch_batch_var = exe.run(
105117
program=paddle.static.default_main_program(),
106118
feed=dict(zip(input_data_names, batch_data)),
107119
fetch_list=[var for _, var in fetch_vars.items()])
120+
infer_run_cost += time.time() - infer_start
108121
if batch_id % print_interval == 0:
109122
metric_str = ""
110123
for var_idx, var_name in enumerate(fetch_vars):
@@ -115,11 +128,17 @@ def main(args):
115128
tag="infer/" + var_name,
116129
step=step_num,
117130
value=fetch_batch_var[var_idx][0])
118-
logger.info("epoch: {}, batch_id: {}, ".format(
119-
epoch_id, batch_id) + metric_str + "speed: {:.2f} ins/s".
120-
format(print_interval * batch_size / (time.time(
121-
) - interval_begin)))
131+
logger.info(
132+
"epoch: {}, batch_id: {}, ".format(epoch_id,
133+
batch_id) + metric_str +
134+
"avg_reader_cost: {:.5f} sec, avg_batch_cost: {:.5f} sec, speed: {:.2f} ins/s".
135+
format(infer_reader_cost / print_interval, (
136+
infer_reader_cost + infer_run_cost) / print_interval,
137+
print_interval * batch_size / (time.time() -
138+
interval_begin)))
122139
interval_begin = time.time()
140+
infer_reader_cost = 0.0
141+
infer_run_cost = 0.0
123142
reader_start = time.time()
124143
step_num = step_num + 1
125144

tools/static_trainer.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
def parse_args():
3737
parser = argparse.ArgumentParser("PaddleRec train static script")
3838
parser.add_argument("-m", "--config_yaml", type=str)
39+
parser.add_argument("--device", type=str)
3940
args = parser.parse_args()
4041
args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
4142
args.config_yaml = get_abs_model(args.config_yaml)
@@ -60,7 +61,12 @@ def main(args):
6061
logger.info("cpu_num: {}".format(os.getenv("CPU_NUM")))
6162
static_model_class.create_optimizer()
6263

63-
use_gpu = config.get("runner.use_gpu", True)
64+
if args.device is None:
65+
use_gpu = config.get("runner.use_gpu", True)
66+
elif args.device == "gpu":
67+
use_gpu = True
68+
else:
69+
use_gpu = False
6470
use_auc = config.get("runner.use_auc", False)
6571
use_visual = config.get("runner.use_visual", False)
6672
auc_num = config.get("runner.auc_num", 1)
@@ -74,9 +80,9 @@ def main(args):
7480
os.environ["CPU_NUM"] = str(config.get("runner.thread_num", 1))
7581
logger.info("**************common.configs**********")
7682
logger.info(
77-
"use_gpu: {}, use_visual: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}".
78-
format(use_gpu, use_visual, train_data_dir, epochs, print_interval,
79-
model_save_path))
83+
"use_gpu: {}, use_visual: {}, train_batch_size: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}".
84+
format(use_gpu, use_visual, batch_size, train_data_dir, epochs,
85+
print_interval, model_save_path))
8086
logger.info("**************common.configs**********")
8187

8288
place = paddle.set_device('gpu' if use_gpu else 'cpu')

tools/trainer.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
def parse_args():
5151
parser = argparse.ArgumentParser(description='paddle-rec run')
5252
parser.add_argument("-m", "--config_yaml", type=str)
53+
parser.add_argument("--device", type=str)
5354
args = parser.parse_args()
5455
args.abs_dir = os.path.dirname(os.path.abspath(args.config_yaml))
5556
args.config_yaml = get_abs_model(args.config_yaml)
@@ -64,19 +65,26 @@ def main(args):
6465
config["config_abs_dir"] = args.abs_dir
6566

6667
# tools.vars
67-
use_gpu = config.get("runner.use_gpu", True)
68+
if args.device is None:
69+
use_gpu = config.get("runner.use_gpu", True)
70+
elif args.device == "gpu":
71+
use_gpu = True
72+
else:
73+
use_gpu = False
74+
6875
use_visual = config.get("runner.use_visual", False)
6976
train_data_dir = config.get("runner.train_data_dir", None)
7077
epochs = config.get("runner.epochs", None)
7178
print_interval = config.get("runner.print_interval", None)
79+
train_batch_size = config.get("runner.train_batch_size", None)
7280
model_save_path = config.get("runner.model_save_path", "model_output")
7381
model_init_path = config.get("runner.model_init_path", None)
7482

7583
logger.info("**************common.configs**********")
7684
logger.info(
77-
"use_gpu: {}, use_visual: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}".
78-
format(use_gpu, use_visual, train_data_dir, epochs, print_interval,
79-
model_save_path))
85+
"use_gpu: {}, use_visual: {}, train_batch_size: {}, train_data_dir: {}, epochs: {}, print_interval: {}, model_save_path: {}".
86+
format(use_gpu, use_visual, train_batch_size, train_data_dir, epochs,
87+
print_interval, model_save_path))
8088
logger.info("**************common.configs**********")
8189

8290
place = paddle.set_device('gpu' if use_gpu else 'cpu')
@@ -168,8 +176,15 @@ def main(args):
168176
metric_list_name[metric_id] +
169177
": {:.6f},".format(metric_list[metric_id].accumulate()))
170178

179+
tensor_print_str = ""
180+
if tensor_print_dict is not None:
181+
for var_name, var in tensor_print_dict.items():
182+
tensor_print_str += (
183+
"{}:".format(var_name) + str(var.numpy()) + ",")
184+
171185
logger.info("epoch: {} done, ".format(epoch_id) + metric_str +
172-
"epoch time: {:.2f} s".format(time.time() - epoch_begin))
186+
tensor_print_str + " epoch time: {:.2f} s".format(
187+
time.time() - epoch_begin))
173188

174189
save_model(
175190
dy_model, optimizer, model_save_path, epoch_id, prefix='rec')

0 commit comments

Comments
 (0)