Skip to content

Commit 707ca42

Browse files
authored
Merge branch 'master' into fix_setup_print
2 parents 94e15d2 + 7a35959 commit 707ca42

File tree

39 files changed

+138
-119
lines changed

39 files changed

+138
-119
lines changed

core/trainers/framework/dataset.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@
1515
from __future__ import print_function
1616

1717
import os
18-
import warnings
1918

2019
import paddle.fluid as fluid
2120
from paddlerec.core.utils import envs
2221
from paddlerec.core.utils import dataloader_instance
2322
from paddlerec.core.reader import SlotReader
2423
from paddlerec.core.trainer import EngineMode
24+
from paddlerec.core.utils.util import split_files
2525

2626
__all__ = ["DatasetBase", "DataLoader", "QueueDataset"]
2727

@@ -123,7 +123,8 @@ def _get_dataset(self, dataset_name, context):
123123
for x in os.listdir(train_data_path)
124124
]
125125
if context["engine"] == EngineMode.LOCAL_CLUSTER:
126-
file_list = context["fleet"].split_files(file_list)
126+
file_list = split_files(file_list, context["fleet"].worker_index(),
127+
context["fleet"].worker_num())
127128

128129
dataset.set_filelist(file_list)
129130
for model_dict in context["phases"]:

core/utils/dataloader_instance.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from paddlerec.core.utils.envs import get_runtime_environ
2020
from paddlerec.core.reader import SlotReader
2121
from paddlerec.core.trainer import EngineMode
22+
from paddlerec.core.utils.util import split_files
2223

2324

2425
def dataloader_by_name(readerclass,
@@ -39,7 +40,8 @@ def dataloader_by_name(readerclass,
3940

4041
files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
4142
if context["engine"] == EngineMode.LOCAL_CLUSTER:
42-
files = context["fleet"].split_files(files)
43+
files = split_files(files, context["fleet"].worker_index(),
44+
context["fleet"].worker_num())
4345
print("file_list : {}".format(files))
4446

4547
reader = reader_class(yaml_file)
@@ -80,7 +82,8 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
8082

8183
files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
8284
if context["engine"] == EngineMode.LOCAL_CLUSTER:
83-
files = context["fleet"].split_files(files)
85+
files = split_files(files, context["fleet"].worker_index(),
86+
context["fleet"].worker_num())
8487
print("file_list: {}".format(files))
8588

8689
sparse = get_global_env(name + "sparse_slots", "#")
@@ -133,7 +136,8 @@ def slotdataloader(readerclass, train, yaml_file, context):
133136

134137
files = [str(data_path) + "/%s" % x for x in os.listdir(data_path)]
135138
if context["engine"] == EngineMode.LOCAL_CLUSTER:
136-
files = context["fleet"].split_files(files)
139+
files = split_files(files, context["fleet"].worker_index(),
140+
context["fleet"].worker_num())
137141
print("file_list: {}".format(files))
138142

139143
sparse = get_global_env("sparse_slots", "#", namespace)

core/utils/envs.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
import os
1919
import socket
2020
import sys
21+
import six
2122
import traceback
23+
import six
2224

2325
global_envs = {}
2426
global_envs_flatten = {}
@@ -101,6 +103,12 @@ def fatten_env_namespace(namespace_nests, local_envs):
101103
name = ".".join(["dataset", dataset["name"], "type"])
102104
global_envs[name] = "DataLoader"
103105

106+
if get_platform() == "LINUX" and six.PY3:
107+
print("QueueDataset can not support PY3, change to DataLoader")
108+
for dataset in envs["dataset"]:
109+
name = ".".join(["dataset", dataset["name"], "type"])
110+
global_envs[name] = "DataLoader"
111+
104112

105113
def get_global_env(env_name, default_value=None, namespace=None):
106114
"""
@@ -253,11 +261,19 @@ def load_yaml(config):
253261
use_full_loader = False
254262

255263
if os.path.isfile(config):
256-
with open(config, 'r') as rb:
257-
if use_full_loader:
258-
_config = yaml.load(rb.read(), Loader=yaml.FullLoader)
259-
else:
260-
_config = yaml.load(rb.read())
261-
return _config
264+
if six.PY2:
265+
with open(config, 'r') as rb:
266+
if use_full_loader:
267+
_config = yaml.load(rb.read(), Loader=yaml.FullLoader)
268+
else:
269+
_config = yaml.load(rb.read())
270+
return _config
271+
else:
272+
with open(config, 'r', encoding="utf-8") as rb:
273+
if use_full_loader:
274+
_config = yaml.load(rb.read(), Loader=yaml.FullLoader)
275+
else:
276+
_config = yaml.load(rb.read())
277+
return _config
262278
else:
263279
raise ValueError("config {} can not be supported".format(config))

core/utils/util.py

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,8 @@
1919
import numpy as np
2020
from paddle import fluid
2121

22-
from paddlerec.core.utils import fs as fs
23-
2422

2523
def save_program_proto(path, program=None):
26-
2724
if program is None:
2825
_program = fluid.default_main_program()
2926
else:
@@ -171,6 +168,39 @@ def print_cost(cost, params):
171168
return log_str
172169

173170

171+
def split_files(files, trainer_id, trainers):
172+
"""
173+
split files before distributed training,
174+
example 1: files is [a, b, c ,d, e] and trainer_num = 2, then trainer
175+
0 gets [a, b, c] and trainer 1 gets [d, e].
176+
example 2: files is [a, b], and trainer_num = 3, then trainer 0 gets
177+
[a], trainer 1 gets [b], trainer 2 gets []
178+
179+
Args:
180+
files(list): file list need to be read.
181+
182+
Returns:
183+
list: files belongs to this worker.
184+
"""
185+
if not isinstance(files, list):
186+
raise TypeError("files should be a list of file need to be read.")
187+
188+
remainder = len(files) % trainers
189+
blocksize = int(len(files) / trainers)
190+
191+
blocks = [blocksize] * trainers
192+
for i in range(remainder):
193+
blocks[i] += 1
194+
195+
trainer_files = [[]] * trainers
196+
begin = 0
197+
for i in range(trainers):
198+
trainer_files[i] = files[begin:begin + blocks[i]]
199+
begin += blocks[i]
200+
201+
return trainer_files[trainer_id]
202+
203+
174204
class CostPrinter(object):
175205
"""
176206
For count cost time && print cost log

models/contentunderstanding/readme.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ python -m paddlerec.run -m ./config.yaml # 自定义修改超参后,指定配
8686
# 修改对应模型的config.yaml, workspace配置为当前目录的绝对路径
8787
# 修改对应模型的config.yaml,mode配置infer_runner
8888
# 示例: mode: train_runner -> mode: infer_runner
89-
# infer_runner中 class配置为 class: single_infer
89+
# infer_runner中 class配置为 class: infer
9090
# 修改phase阶段为infer的配置,参照config注释
9191
9292
# 修改完config.yaml后 执行:
@@ -106,7 +106,7 @@ python -m paddlerec.run -m ./config.yaml # 自定义修改超参后,指定配
106106
# 修改对应模型的config.yaml, workspace配置为当前目录的绝对路径
107107
# 修改对应模型的config.yaml,mode配置infer_runner
108108
# 示例: mode: train_runner -> mode: infer_runner
109-
# infer_runner中 class配置为 class: single_infer
109+
# infer_runner中 class配置为 class: infer
110110
# 修改phase阶段为infer的配置,参照config注释
111111
112112
# 修改完config.yaml后 执行:

models/demo/movie_recommand/rank/config.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ runner:
6464
device: cpu
6565

6666
- name: runner_infer
67-
epochs: 1
68-
class: single_infer
67+
class: infer
6968
print_interval: 10000
7069
init_model_path: "increment/9" # load model path
7170

models/demo/movie_recommand/recall/config.yaml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ runner:
6464
device: cpu
6565

6666
- name: runner_infer
67-
epochs: 1
68-
class: single_infer
67+
class: infer
6968
print_interval: 10000
7069
init_model_path: "increment/9" # load model path
7170

models/match/dssm/config.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@ runner:
5656
init_model_path: "" # load model path
5757
print_interval: 2
5858
- name: infer_runner
59-
class: single_infer
60-
# num of epochs
61-
epochs: 1
59+
class: infer
6260
# device to run training or infer
6361
device: cpu
6462
print_interval: 1

models/match/multiview-simnet/config.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,7 @@ runner:
6363
init_model_path: "" # load model path
6464
print_interval: 1
6565
- name: infer_runner
66-
class: single_infer
67-
# num of epochs
68-
epochs: 1
66+
class: infer
6967
# device to run training or infer
7068
device: cpu
7169
print_interval: 1

models/match/readme.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ python -m paddlerec.run -m paddlerec.models.match.multiview-simnet # multiview-s
4343
# 修改对应模型的config.yaml, workspace配置为当前目录的绝对路径
4444
# 修改对应模型的config.yaml,mode配置infer_runner
4545
# 示例: mode: train_runner -> mode: infer_runner
46-
# infer_runner中 class配置为 class: single_infer
46+
# infer_runner中 class配置为 class: infer
4747
# 修改phase阶段为infer的配置,参照config注释
4848

4949
# 修改完config.yaml后 执行:

0 commit comments

Comments
 (0)