Skip to content

Commit ddf6ec2

Browse files
authored
fix demo (#213)
* fix demo * fix * fix * fix code style
1 parent b1f708f commit ddf6ec2

File tree

14 files changed

+342
-111
lines changed

14 files changed

+342
-111
lines changed

core/trainers/framework/runner.py

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import warnings
2020
import numpy as np
2121
import random
22+
import json
2223
import logging
2324
import paddle.fluid as fluid
2425

@@ -147,17 +148,22 @@ def _executor_dataloader_train(self, model_dict, context):
147148
metrics_format = []
148149

149150
if context["is_infer"]:
150-
metrics_format.append("\t[Infer]\t{}: {{}}".format("batch"))
151+
metrics_format.append("\t[Infer] {}: {{}}".format("batch"))
151152
else:
152-
metrics_format.append("\t[Train]\t{}: {{}}".format("batch"))
153+
metrics_format.append("\t[Train]")
154+
if "current_epoch" in context:
155+
metrics_format.append(" epoch: {}".format(context[
156+
"current_epoch"]))
157+
metrics_format.append(" {}: {{}}".format("batch"))
153158

154159
metrics_format.append("{}: {{:.2f}}s".format("time_each_interval"))
155160

156161
metrics_names = ["total_batch"]
157-
162+
metrics_indexes = dict()
158163
for name, var in metrics.items():
159164
metrics_names.append(name)
160165
metrics_varnames.append(var.name)
166+
metrics_indexes[var.name] = len(metrics_varnames) - 1
161167
metrics_format.append("{}: {{}}".format(name))
162168
metrics_format = ", ".join(metrics_format)
163169

@@ -166,6 +172,7 @@ def _executor_dataloader_train(self, model_dict, context):
166172
batch_id = 0
167173
begin_time = time.time()
168174
scope = context["model"][model_name]["scope"]
175+
runner_results = []
169176
result = None
170177
with fluid.scope_guard(scope):
171178
try:
@@ -182,18 +189,35 @@ def _executor_dataloader_train(self, model_dict, context):
182189
]
183190
metrics.extend(metrics_rets)
184191

192+
batch_runner_result = {}
193+
for k, v in metrics_indexes.items():
194+
batch_runner_result[k] = np.array(metrics_rets[
195+
v]).tolist()
196+
runner_results.append(batch_runner_result)
197+
185198
if batch_id % fetch_period == 0 and batch_id != 0:
186199
end_time = time.time()
187200
seconds = end_time - begin_time
188201
metrics_logging = metrics[:]
189202
metrics_logging = metrics.insert(1, seconds)
190203
begin_time = end_time
191-
192204
logging.info(metrics_format.format(*metrics))
193205
batch_id += 1
194206
except fluid.core.EOFException:
195207
reader.reset()
196208

209+
runner_result_save_path = envs.get_global_env(
210+
"runner." + context["runner_name"] + ".runner_result_dump_path",
211+
None)
212+
if runner_result_save_path:
213+
if "current_epoch" in context:
214+
runner_result_save_path = runner_result_save_path + "_epoch_{}".format(
215+
context["current_epoch"])
216+
logging.info("Dump runner result in {}".format(
217+
runner_result_save_path))
218+
with open(runner_result_save_path, 'w+') as fout:
219+
json.dump(runner_results, fout)
220+
197221
if batch_id > 0:
198222
result = dict(zip(metrics_names, metrics))
199223
return result
@@ -402,6 +426,7 @@ def run(self, context):
402426
filelist = context["file_list"]
403427
context["file_list"] = shuffle_files(need_shuffle_files,
404428
filelist)
429+
context["current_epoch"] = epoch
405430
begin_time = time.time()
406431
result = self._run(context, model_dict)
407432
end_time = time.time()
@@ -450,6 +475,7 @@ def run(self, context):
450475
filelist = context["file_list"]
451476
context["file_list"] = shuffle_files(need_shuffle_files,
452477
filelist)
478+
context["current_epoch"] = epoch
453479
begin_time = time.time()
454480
result = self._run(context, model_dict)
455481
end_time = time.time()
@@ -500,6 +526,7 @@ def run(self, context):
500526
filelist = context["file_list"]
501527
context["file_list"] = shuffle_files(need_shuffle_files,
502528
filelist)
529+
context["current_epoch"] = epoch
503530
begin_time = time.time()
504531
self._run(context, model_dict)
505532
end_time = time.time()
@@ -533,6 +560,7 @@ def run(self, context):
533560
filelist = context["file_list"]
534561
context["file_list"] = shuffle_files(need_shuffle_files,
535562
filelist)
563+
context["current_epoch"] = epoch
536564
begin_time = time.time()
537565
self._run(context, model_dict)
538566
end_time = time.time()

doc/yaml.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
| runner_class_path | string | 路径 || 自定义runner流程实现的地址 |
3939
| terminal_class_path | string | 路径 || 自定义terminal流程实现的地址 |
4040
| init_pretraining_model_path | string | 路径 ||自定义的startup流程中需要传入这个参数,finetune中需要加载的参数的地址 |
41+
| runner_result_dump_path | string | 路径 || 运行中metrics的结果使用json.dump到文件的地址,若是在训练的runner中使用, 会自动加上epoch后缀 |
4142

4243

4344

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# PaddleRec 基于 Movielens 数据集的全流程示例
2+
3+
## 模型的详细教程可以查阅: [十分钟!全流程!从零搭建推荐系统](https://aistudio.baidu.com/aistudio/projectdetail/559336)
4+
5+
## 本地运行流程
6+
7+
在本地需要安装`PaddleRec``PaddlePaddle`,推荐在`Linux` + `python2.7` 环境下执行此demo
8+
9+
本地运行流程与AiStudio流程基本一致,细节略有区别
10+
11+
### 离线训练
12+
```shell
13+
sh train.sh
14+
```
15+
16+
### 离线测试
17+
```shell
18+
sh offline_test.sh
19+
```
20+
21+
### 模拟在线召回
22+
```shell
23+
sh online_recall.sh
24+
```
25+
26+
### 模拟在线排序
27+
```shell
28+
sh online_rank.sh
29+
```
Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,25 @@
11
cd data
22

3+
echo "---> Download movielens 1M data ..."
34
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
5+
echo "---> Unzip ml-1m.zip ..."
46
unzip ml-1m.zip
7+
rm ml-1m.zip
58

9+
echo "---> Split movielens data ..."
610
python split.py
711

8-
mkdir train/
9-
mkdir test/
12+
mkdir -p train/
13+
mkdir -p test/
1014

15+
echo "---> Process train & test data ..."
1116
python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
1217
python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
1318
python process_ml_1m.py hash log.data.train > ./train/data.txt
1419
python process_ml_1m.py hash log.data.test > ./test/data.txt
1520

1621
rm log.data.train
1722
rm log.data.test
18-
cd ../
23+
cd ..
24+
25+
echo "---> Finish data process"
Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
## modify config.yaml to infer mode at first
22

3-
cd recall
4-
python -m paddlerec.run -m ./config.yaml
5-
cd ../rank
6-
python -m paddlerec.run -m ./config.yaml
7-
cd ..
3+
echo "Recall offline test ..."
4+
echo "Model config at models/demo/movie_recommand/recall/config_offline_test.yaml"
5+
python -m paddlerec.run -m ./recall/config_test_offline.yaml
6+
7+
echo "Rank offline test ..."
8+
echo "Model config at models/demo/movie_recommand/rank/config_offline_test.yaml"
9+
python -m paddlerec.run -m ./rank/config_test_offline.yaml
810

911
echo "recall offline test result:"
1012
python parse.py recall_offline recall/infer_result
13+
1114
echo "rank offline test result:"
1215
python parse.py rank_offline rank/infer_result
Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
cd data
2+
echo "Create online test data ..."
23
python process_ml_1m.py data_rank > online_user/test/data.txt
34

4-
## modify recall/config.yaml to online_infer mode
5-
cd ../rank
6-
python -m paddlerec.run -m ./config.yaml
7-
cd ../
8-
python parse.py rank_online rank/infer_result
5+
cd ..
6+
echo "Rank online test ..."
7+
echo "Model config at models/demo/movie_recommand/rank/config_online_test.yaml"
8+
python -m paddlerec.run -m ./rank/config_test_online.yaml
9+
python parse.py rank_online ./rank/infer_result
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
cd data
2+
echo "Create online test data ..."
23
mkdir online_user/test
34
python process_ml_1m.py data_recall > online_user/test/data.txt
45

5-
## modify recall/config.yaml to online_infer mode
6-
cd ../recall
7-
python -m paddlerec.run -m ./config.yaml
8-
cd ../
6+
cd ..
7+
echo "Recall online test ..."
8+
echo "Model config at models/demo/movie_recommand/recall/config_online_test.yaml"
9+
python -m paddlerec.run -m ./recall/config_test_online.yaml
910
python parse.py recall_online recall/infer_result

models/demo/movie_recommand/rank/config.yaml

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,16 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
workspace: "models/demo/movie_recommand"
15+
workspace: "./"
1616

1717
# list of dataset
1818
dataset:
1919
- name: dataset_train # name of dataset to distinguish different datasets
2020
batch_size: 128
21-
type: QueueDataset
21+
type: DataLoader
2222
data_path: "{workspace}/data/train"
2323
sparse_slots: "logid time userid gender age occupation movieid title genres label"
2424
dense_slots: ""
25-
- name: dataset_infer # name
26-
batch_size: 128
27-
type: DataLoader
28-
data_path: "{workspace}/data/test"
29-
sparse_slots: "logid time userid gender age occupation movieid title genres label"
30-
dense_slots: ""
31-
- name: dataset_online_infer # name
32-
batch_size: 10
33-
type: DataLoader
34-
data_path: "{workspace}/data/online_user/test"
35-
sparse_slots: "logid time userid gender age occupation movieid title genres label"
36-
dense_slots: ""
3725

3826
# hyper parameters of user-defined network
3927
hyper_parameters:
@@ -51,42 +39,17 @@ hyper_parameters:
5139
# train
5240
mode: runner_train
5341

54-
## online or offline infer
55-
#mode: runner_infer
5642
runner:
5743
- name: runner_train
5844
class: train
5945
save_checkpoint_interval: 1 # save model interval of epochs
60-
save_inference_interval: 1 # save inference
61-
save_checkpoint_path: "increment" # save checkpoint path
62-
save_inference_path: "inference" # save inference path
46+
save_checkpoint_path: "increment_rank" # save checkpoint path
6347
epochs: 10
6448
device: cpu
6549

66-
- name: runner_infer
67-
class: infer
68-
print_interval: 10000
69-
init_model_path: "increment/9" # load model path
70-
7150
#train
7251
phase:
7352
- name: phase1
74-
model: "{workspace}/model.py" # user-defined model
53+
model: "{workspace}/rank/model.py" # user-defined model
7554
dataset_name: dataset_train # select dataset by name
76-
thread_num: 12
77-
78-
##offline infer
79-
#phase:
80-
#- name: phase1
81-
# model: "{workspace}/model.py" # user-defined model
82-
# dataset_name: dataset_infer # select dataset by name
83-
# save_path: "./infer_result"
84-
# thread_num: 1
85-
86-
##offline infer
87-
#phase:
88-
#- name: phase1
89-
# model: "{workspace}/model.py" # user-defined model
90-
# dataset_name: dataset_online_infer # select dataset by name
91-
# save_path: "./infer_result"
92-
# thread_num: 1
55+
thread_num: 4
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
#workspace: "paddlerec.models.demo.movie_recommand"
16+
workspace: "./"
17+
18+
# list of dataset
19+
dataset:
20+
- name: dataset_infer # name
21+
batch_size: 128
22+
type: DataLoader
23+
data_path: "{workspace}/data/test"
24+
sparse_slots: "logid time userid gender age occupation movieid title genres label"
25+
dense_slots: ""
26+
27+
# hyper parameters of user-defined network
28+
hyper_parameters:
29+
# optimizer config
30+
optimizer:
31+
class: Adam
32+
learning_rate: 0.001
33+
strategy: async
34+
# user-defined <key, value> pairs
35+
sparse_feature_number: 60000000
36+
sparse_feature_dim: 9
37+
dense_input_dim: 13
38+
fc_sizes: [512, 256, 128, 32]
39+
40+
# train
41+
mode: runner_infer
42+
43+
## online or offline infer
44+
#mode: runner_infer
45+
runner:
46+
- name: runner_infer
47+
epochs: 1
48+
device: cpu
49+
class: infer
50+
print_interval: 10000
51+
runner_result_dump_path: "{workspace}/rank/infer_result"
52+
init_model_path: "increment_rank/9" # load model path
53+
54+
#offline infer
55+
phase:
56+
- name: phase1
57+
model: "{workspace}/rank/model.py" # user-defined model
58+
dataset_name: dataset_infer # select dataset by name
59+
thread_num: 1
60+

0 commit comments

Comments
 (0)