Skip to content

Commit 9d7279b

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into refine_code
2 parents bc81603 + 3000e99 commit 9d7279b

File tree

8 files changed

+193
-87
lines changed

8 files changed

+193
-87
lines changed

doc/v2/howto/cluster/multi_cluster/index_en.rst

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,28 +8,28 @@ The user's cluster environment is not the same. To facilitate everyone's deploym
88
.. toctree::
99
:maxdepth: 1
1010

11-
k8s_cn.md
12-
k8s_distributed_cn.md
11+
k8s_en.md
12+
k8s_distributed_en.md
1313

1414
`OpenMPI <https://www.open-mpi.org>`_ is a mature high-performance parallel computing framework, which is widely used in the field of HPC. The following guide describes how to use OpenMPI to build PaddlePaddle's cluster training task:
1515

1616
.. toctree::
1717
:maxdepth: 1
1818

19-
openmpi_cn.md
19+
openmpi_en.md
2020

2121
`Fabric <http://www.fabfile.org>`_ is a convenient tool for program deployment and management. We provide a way to deploy and manage with Fabric. If you want to know more about it, please read the following guidelines:
2222

2323
.. toctree::
2424
:maxdepth: 1
2525

26-
fabric_cn.md
26+
fabric_en.md
2727

2828
We also support the deployment of PaddlePaddle on AWS. Learn more about:
2929

3030
.. toctree::
3131
:maxdepth: 1
3232

33-
k8s_aws_cn.md
33+
k8s_aws_en.md
3434

35-
The examples can be found under `cluster_train_v2 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/scripts/cluster_train_v2>`_ .
35+
The examples can be found under `cluster_train_v2 <https://github.com/PaddlePaddle/Paddle/tree/develop/paddle/scripts/cluster_train_v2>`_ .

paddle/fluid/platform/cuda_device_function.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ __forceinline__ __device__ T __shfl_sync(unsigned, T val, int src_line,
3535
#define FULL_WARP_MASK 0xFFFFFFFF
3636
#define CREATE_SHFL_MASK(mask, predicate) \
3737
mask = __ballot_sync(FULL_WARP_MASK, (predicate))
38+
template <typename T>
39+
__forceinline__ __device__ T __shfl_down_sync(unsigned mask, T val, int delta) {
40+
return __shfl_down_sync(mask, val, delta);
41+
}
42+
43+
template <typename T>
44+
__forceinline__ __device__ T __shfl_sync(unsigned mask, T val, int src_line,
45+
int width) {
46+
return __shfl_sync(mask, val, src_line, width);
47+
}
3848
#endif
3949

4050
template <typename T>

python/paddle/fluid/__init__.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,15 @@
2121
from executor import *
2222

2323
import trainer
24-
from trainer import *
24+
from trainer import Trainer
25+
from trainer import BeginEpochEvent
26+
from trainer import EndEpochEvent
27+
from trainer import BeginStepEvent
28+
from trainer import EndStepEvent
2529

2630
import inferencer
2731
from inferencer import Inferencer
2832

29-
import params
30-
from params import Params
31-
3233
import io
3334
import evaluator
3435
import initializer
@@ -57,7 +58,7 @@
5758
Tensor = LoDTensor
5859

5960
__all__ = framework.__all__ + executor.__all__ + concurrency.__all__ +\
60-
trainer.__all__ + inferencer.__all__ + params.__all__ + [
61+
trainer.__all__ + inferencer.__all__ + [
6162
'io',
6263
'initializer',
6364
'layers',

python/paddle/fluid/inferencer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,22 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import core
16+
1517
__all__ = ['Inferencer', ]
1618

1719

1820
class Inferencer(object):
19-
def __init__(self, network_func, params, place=None):
21+
def __init__(self, network_func, param_path=None, place=None):
2022
# 1. we need to generate a framework.Program by calling
2123
# network_func. Reference: fluid.program_guard in test_word2vec.py
2224

2325
# 2. move the default_main_program to self.program.
2426

2527
# 3. run the default_startup program.
26-
self.params = params
28+
29+
# 4. load params from param_path into scope
30+
self.scope = core.Scope()
2731
self.place = place
2832

2933
def infer(self, inputs):

python/paddle/fluid/params.py

Lines changed: 0 additions & 39 deletions
This file was deleted.
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
import paddle
18+
import paddle.fluid as fluid
19+
from functools import partial
20+
21+
CLASS_DIM = 2
22+
EMB_DIM = 128
23+
HID_DIM = 512
24+
STACKED_NUM = 3
25+
26+
27+
def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
28+
assert stacked_num % 2 == 1
29+
30+
emb = fluid.layers.embedding(
31+
input=data, size=[input_dim, emb_dim], is_sparse=True)
32+
33+
fc1 = fluid.layers.fc(input=emb, size=hid_dim)
34+
lstm1, cell1 = fluid.layers.dynamic_lstm(input=fc1, size=hid_dim)
35+
36+
inputs = [fc1, lstm1]
37+
38+
for i in range(2, stacked_num + 1):
39+
fc = fluid.layers.fc(input=inputs, size=hid_dim)
40+
lstm, cell = fluid.layers.dynamic_lstm(
41+
input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
42+
inputs = [fc, lstm]
43+
44+
fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
45+
lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
46+
47+
prediction = fluid.layers.fc(input=[fc_last, lstm_last],
48+
size=class_dim,
49+
act='softmax')
50+
return prediction
51+
52+
53+
def inference_network(word_dict):
54+
data = fluid.layers.data(
55+
name="words", shape=[1], dtype="int64", lod_level=1)
56+
57+
dict_dim = len(word_dict)
58+
net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM,
59+
STACKED_NUM)
60+
return net
61+
62+
63+
def train_network(word_dict):
64+
prediction = inference_network(word_dict)
65+
label = fluid.layers.data(name="label", shape=[1], dtype="int64")
66+
cost = fluid.layers.cross_entropy(input=prediction, label=label)
67+
avg_cost = fluid.layers.mean(cost)
68+
accuracy = fluid.layers.accuracy(input=prediction, label=label)
69+
return avg_cost, accuracy
70+
71+
72+
def train(use_cuda, save_path):
73+
BATCH_SIZE = 128
74+
EPOCH_NUM = 5
75+
76+
word_dict = paddle.dataset.imdb.word_dict()
77+
78+
train_data = paddle.batch(
79+
paddle.reader.shuffle(
80+
paddle.dataset.imdb.train(word_dict), buf_size=1000),
81+
batch_size=BATCH_SIZE)
82+
83+
test_data = paddle.batch(
84+
paddle.dataset.imdb.test(word_dict), batch_size=BATCH_SIZE)
85+
86+
def event_handler(event):
87+
if isinstance(event, fluid.EndIteration):
88+
if (event.batch_id % 10) == 0:
89+
avg_cost, accuracy = trainer.test(reader=test_data)
90+
91+
print('BatchID {1:04}, Loss {2:2.2}, Acc {3:2.2}'.format(
92+
event.batch_id + 1, avg_cost, accuracy))
93+
94+
if accuracy > 0.01: # Low threshold for speeding up CI
95+
trainer.params.save(save_path)
96+
return
97+
98+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
99+
trainer = fluid.Trainer(
100+
partial(train_network, word_dict),
101+
optimizer=fluid.optimizer.Adagrad(learning_rate=0.002),
102+
place=place,
103+
event_handler=event_handler)
104+
105+
trainer.train(train_data, EPOCH_NUM, event_handler=event_handler)
106+
107+
108+
def infer(use_cuda, save_path):
109+
params = fluid.Params(save_path)
110+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
111+
word_dict = paddle.dataset.imdb.word_dict()
112+
inferencer = fluid.Inferencer(
113+
partial(inference_network, word_dict), params, place=place)
114+
115+
def create_random_lodtensor(lod, place, low, high):
116+
data = np.random.random_integers(low, high,
117+
[lod[-1], 1]).astype("int64")
118+
res = fluid.LoDTensor()
119+
res.set(data, place)
120+
res.set_lod([lod])
121+
return res
122+
123+
lod = [0, 4, 10]
124+
tensor_words = create_random_lodtensor(
125+
lod, place, low=0, high=len(word_dict) - 1)
126+
results = inferencer.infer({'words': tensor_words})
127+
print("infer results: ", results)
128+
129+
130+
def main(use_cuda):
131+
if use_cuda and not fluid.core.is_compiled_with_cuda():
132+
return
133+
save_path = "understand_sentiment_stacked_lstm.inference.model"
134+
train(use_cuda, save_path)
135+
infer(use_cuda, save_path)
136+
137+
138+
if __name__ == '__main__':
139+
for use_cuda in (False, True):
140+
main(use_cuda=use_cuda)

python/paddle/fluid/tests/book/word2vec/no_test_word2vec_new_api.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def create_random_lodtensor(lod, place, low, high):
3939
dict_size = len(word_dict)
4040

4141

42-
def inference_network(is_sparse):
42+
def inference_program(is_sparse):
4343
first_word = fluid.layers.data(name='firstw', shape=[1], dtype='int64')
4444
second_word = fluid.layers.data(name='secondw', shape=[1], dtype='int64')
4545
third_word = fluid.layers.data(name='thirdw', shape=[1], dtype='int64')
@@ -79,9 +79,9 @@ def inference_network(is_sparse):
7979
return predict_word
8080

8181

82-
def train_network(is_sparse):
82+
def train_program(is_sparse):
8383
next_word = fluid.layers.data(name='nextw', shape=[1], dtype='int64')
84-
predict_word = inference_network(is_sparse)
84+
predict_word = inference_program(is_sparse)
8585
cost = fluid.layers.cross_entropy(input=predict_word, label=next_word)
8686
avg_cost = fluid.layers.mean(cost)
8787
return avg_cost
@@ -100,23 +100,25 @@ def event_handler(event):
100100
word_dict, N))
101101

102102
if avg_cost < 5.0:
103-
trainer.params.save(save_path)
103+
trainer.save_params(save_path)
104104
return
105105
if math.isnan(avg_cost):
106106
sys.exit("got NaN loss, training failed.")
107107

108108
trainer = fluid.Trainer(
109-
partial(train_network, is_sparse),
109+
partial(train_program, is_sparse),
110110
fluid.optimizer.SGD(learning_rate=0.001),
111111
place=place)
112112
trainer.train(
113113
reader=train_reader, num_epochs=100, event_handler=event_handler)
114114

115115

116-
def infer(use_cuda, save_path):
117-
params = fluid.Params(save_path)
116+
def infer(use_cuda, is_sparse, save_path):
118117
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
119-
inferencer = fluid.Inferencer(inference_network, params, place=place)
118+
inferencer = fluid.Inferencer(
119+
partial(inference_program, is_sparse),
120+
param_path=save_path,
121+
place=place)
120122

121123
lod = [0, 1]
122124
first_word = create_random_lodtensor(lod, place, low=0, high=dict_size - 1)
@@ -138,7 +140,7 @@ def main(use_cuda, is_sparse):
138140

139141
save_path = "word2vec.inference.model"
140142
train(use_cuda, is_sparse, save_path)
141-
infer(use_cuda, save_path)
143+
infer(use_cuda, is_sparse, save_path)
142144

143145

144146
if __name__ == '__main__':

0 commit comments

Comments
 (0)