Skip to content

Commit b3417af

Browse files
authored
Merge pull request #794 from esythan/gpups
gpups
2 parents ba26394 + 2ccd243 commit b3417af

File tree

5 files changed

+27
-8
lines changed

5 files changed

+27
-8
lines changed

models/rank/dnn/config_gpubox.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ runner:
2727
model_save_path: "output_model_dnn_queue"
2828

2929
sync_mode: "gpubox"
30-
thread_num: 16
30+
thread_num: 30
3131
reader_type: "InmemoryDataset" # DataLoader / QueueDataset / RecDataset / InmemoryDataset
3232
pipe_command: "python3.7 models/rank/dnn/queuedataset_reader.py"
3333
dataset_debug: False
@@ -49,7 +49,7 @@ hyper_parameters:
4949
# user-defined <key, value> pairs
5050
sparse_inputs_slots: 27
5151
sparse_feature_number: 1024
52-
sparse_feature_dim: 11
52+
sparse_feature_dim: 9
5353
dense_input_dim: 13
5454
fc_sizes: [512, 256, 128, 32]
5555
distributed_embedding: 0

models/rank/dnn/net.py

100644100755
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,17 +64,19 @@ def __init__(self,
6464
self.add_sublayer('act_%d' % i, act)
6565
self._mlp_layers.append(act)
6666

67-
def forward(self, sparse_inputs, dense_inputs):
67+
def forward(self, sparse_inputs, dense_inputs, show_click=None):
6868

6969
sparse_embs = []
7070
for s_input in sparse_inputs:
7171
if self.sync_mode == "gpubox":
7272
emb = paddle.fluid.contrib.sparse_embedding(
7373
input=s_input,
7474
size=[
75-
self.sparse_feature_number, self.sparse_feature_dim
75+
self.sparse_feature_number, self.sparse_feature_dim + 2
7676
],
7777
param_attr=paddle.ParamAttr(name="embedding"))
78+
emb = paddle.fluid.layers.continuous_value_model(
79+
emb, show_click, False)
7880
else:
7981
emb = self.embedding(s_input)
8082
emb = paddle.reshape(emb, shape=[-1, self.sparse_feature_dim])

models/rank/dnn/static_model.py

100644100755
Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,15 @@ def create_feeds(self, is_infer=False):
5151
shape=[None, self.dense_input_dim],
5252
dtype="float32")
5353

54+
# sparse_input_ids = [
55+
# paddle.static.data(
56+
# name="C" + str(i), shape=[None, 1], dtype="int64")
57+
# for i in range(1, self.sparse_inputs_slots)
58+
# ]
59+
5460
sparse_input_ids = [
5561
paddle.static.data(
56-
name="C" + str(i), shape=[None, 1], dtype="int64")
62+
name=str(i), shape=[None, 1], dtype="int64")
5763
for i in range(1, self.sparse_inputs_slots)
5864
]
5965

@@ -77,8 +83,15 @@ def net(self, input, is_infer=False):
7783
self.fc_sizes,
7884
sync_mode=self.sync_mode)
7985

86+
self.cast_label = paddle.cast(self.label_input, dtype='float32')
87+
ones = paddle.fluid.layers.fill_constant_batch_size_like(
88+
input=self.label_input, shape=[-1, 1], dtype="float32", value=1)
89+
show_click = paddle.cast(
90+
paddle.concat(
91+
[ones, self.cast_label], axis=1), dtype='float32')
92+
show_click.stop_gradient = True
8093
raw_predict_2d = dnn_model.forward(self.sparse_inputs,
81-
self.dense_input)
94+
self.dense_input, show_click)
8295

8396
predict_2d = paddle.nn.functional.softmax(raw_predict_2d)
8497

tools/static_gpubox_trainer.py

100644100755
Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,10 @@ def run_worker(self):
123123
gpus_env = os.getenv("FLAGS_selected_gpus")
124124
self.PSGPU = paddle.fluid.core.PSGPU()
125125
gpuslot = [int(i) for i in range(1, self.model.sparse_inputs_slots)]
126+
gpu_mf_sizes = [self.model.sparse_feature_dim - 1] * (
127+
self.model.sparse_inputs_slots - 1)
126128
self.PSGPU.set_slot_vector(gpuslot)
129+
self.PSGPU.set_slot_dim_vector(gpu_mf_sizes)
127130
self.PSGPU.init_gpu_ps([int(s) for s in gpus_env.split(",")])
128131
opt_info = paddle.fluid.default_main_program()._fleet_opt
129132
if use_auc is True:
@@ -139,7 +142,6 @@ def run_worker(self):
139142
if sync_mode == "heter":
140143
self.heter_train_loop(epoch)
141144
elif sync_mode == "gpubox":
142-
self.reader._set_use_ps_gpu(1)
143145
self.dataset_train_loop(epoch)
144146
elif reader_type == "QueueDataset":
145147
self.dataset_train_loop(epoch)
@@ -171,6 +173,7 @@ def run_worker(self):
171173
"Epoch: {}, using time {} second, ips {} {}/sec.".format(
172174
epoch, epoch_time, epoch_speed, self.count_method))
173175
self.train_result_dict["speed"].append(epoch_speed)
176+
self.PSGPU.end_pass()
174177

175178
model_dir = "{}/{}".format(save_model_path, epoch)
176179
if fleet.is_first_worker(
@@ -181,7 +184,6 @@ def run_worker(self):
181184
self.inference_target_var)
182185
fleet.barrier_worker()
183186
self.reader.release_memory()
184-
self.PSGPU.end_pass()
185187
logger.info("finish {} epoch training....".format(epoch))
186188
self.PSGPU.finalize()
187189

tools/utils/static_ps/reader_helper.py

100644100755
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,12 @@ def __init__(self, input_var, file_list, config):
261261
self.fs_name = self.config.get("runner.fs_name", "")
262262
self.fs_ugi = self.config.get("runner.fs_ugi", "")
263263
print("hdfs config:", self.fs_name, self.fs_ugi)
264+
self.use_gpu = self.config.get("runner.use_gpu", False)
264265

265266
def get_reader(self):
266267
logger.info("Get InmemoryDataset")
267268
dataset = paddle.distributed.InMemoryDataset()
269+
dataset._set_use_ps_gpu(self.use_gpu)
268270
dataset.init(
269271
use_var=self.input_var,
270272
pipe_command=self.pipe_command,

0 commit comments

Comments
 (0)