Skip to content

Commit 5bca34e

Browse files
committed
Merge remote-tracking branch 'upstream/master'
2 parents 32b5557 + 341486d commit 5bca34e

File tree

8 files changed

+102
-78
lines changed

8 files changed

+102
-78
lines changed

doc/algorithm/rnn/rnn.rst

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,15 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
142142
The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:
143143

144144
.. code-block:: python
145-
145+
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
146+
StaticInput(input=encoded_proj,is_seq=True)]
146147
trg_embedding = embedding_layer(
147148
input=data_layer(name='target_language_word',
148149
size=target_dict_dim),
149150
size=word_vector_dim,
150151
param_attr=ParamAttr(name='_target_language_embedding'))
152+
group_inputs.append(trg_embedding)
153+
151154
# For decoder equipped with attention mechanism, in training,
152155
# target embedding (the groudtruth) is the data input,
153156
# while encoded source sequence is accessed to as an unbounded memory.
@@ -156,13 +159,7 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
156159
# All sequence inputs should have the same length.
157160
decoder = recurrent_group(name=decoder_group_name,
158161
step=gru_decoder_with_attention,
159-
input=[
160-
StaticInput(input=encoded_vector,
161-
is_seq=True),
162-
StaticInput(input=encoded_proj,
163-
is_seq=True),
164-
trg_embedding
165-
])
162+
input=group_inputs)
166163
167164
168165
The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function:
@@ -217,10 +214,8 @@ The code is listed below:
217214

218215
.. code-block:: python
219216
220-
gen_inputs = [StaticInput(input=encoded_vector,
221-
is_seq=True),
222-
StaticInput(input=encoded_proj,
223-
is_seq=True), ]
217+
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
218+
StaticInput(input=encoded_proj,is_seq=True)]
224219
# In generation, decoder predicts a next target word based on
225220
# the encoded source sequence and the last generated target word.
226221
# The encoded source sequence (encoder's output) must be specified by
@@ -231,10 +226,10 @@ The code is listed below:
231226
size=target_dict_dim,
232227
embedding_name='_target_language_embedding',
233228
embedding_size=word_vector_dim)
234-
gen_inputs.append(trg_embedding)
229+
group_inputs.append(trg_embedding)
235230
beam_gen = beam_search(name=decoder_group_name,
236231
step=gru_decoder_with_attention,
237-
input=gen_inputs,
232+
input=group_inputs,
238233
id_input=data_layer(name="sent_id",
239234
size=1),
240235
dict_file=trg_dict_path,

doc/ui/api/trainer_config_helpers/layers.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,12 @@ dotmul_projection
169169
:members: dotmul_projection
170170
:noindex:
171171

172+
dotmul_operator
173+
---------------
174+
.. automodule:: paddle.trainer_config_helpers.layers
175+
:members: dotmul_operator
176+
:noindex:
177+
172178
full_matrix_projection
173179
----------------------
174180
.. automodule:: paddle.trainer_config_helpers.layers

paddle/gserver/layers/CudnnConvLayer.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
8585
biasOffset_ = numFilters_ / groups_[0];
8686
}
8787

88+
batchNum_ = 0;
8889
isSelectAlgo_ = false;
8990
return true;
9091
}
@@ -132,6 +133,11 @@ void CudnnConvLayer::reshape(int batchSize) {
132133
getOutput().setFrameHeight(outputH_);
133134
getOutput().setFrameWidth(outputW_);
134135

136+
// if the batchSize remains the same, set isSelectAlgo_ true.
137+
// Otherwise, set isSelectAlgo_ false and select algo again.
138+
isSelectAlgo_ = (batchSize == batchNum_);
139+
batchNum_ = batchSize;
140+
135141
size_t maxWorkSpace = 0;
136142
for (size_t i = 0; i < inputLayers_.size(); i++) {
137143
CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(),
@@ -160,6 +166,10 @@ void CudnnConvLayer::reshape(int batchSize) {
160166

161167
maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]);
162168
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]);
169+
170+
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_[i]
171+
<< " / " << bwdDataAlgo_[i]
172+
<< " / " << bwdFilterAlgo_[i];
163173
}
164174
}
165175

paddle/gserver/layers/CudnnConvLayer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,10 @@ class CudnnConvLayer : public ConvBaseLayer {
8787
/// Is or not select conv algorihtm.
8888
bool isSelectAlgo_;
8989

90+
/// batchNum is used to record batch size. If the batch size is changed,
91+
/// the selection algorithm will be called.
92+
int batchNum_;
93+
9094
public:
9195
explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
9296

paddle/gserver/layers/MultinomialSampler.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace paddle {
1919

2020
MultinomialSampler::MultinomialSampler(const real* prob, int size)
2121
: rand_(0.0, size) {
22-
intervals_.reserve(size + 1);
22+
intervals_.resize(size + 1);
2323
double sum = 0;
2424
for (int i = 0; i < size; ++i) {
2525
sum += prob[i];
@@ -50,12 +50,13 @@ MultinomialSampler::MultinomialSampler(const real* prob, int size)
5050
int bigPos = nextBigPos(0);
5151

5252
auto fillIntervals = [&]() {
53-
while (bigPos < size && smallPos < size) {
53+
while (bigPos < size) {
5454
while (intervals_[bigPos].thresh > 1 && smallPos < size) {
5555
intervals_[smallPos].otherId = bigPos;
5656
intervals_[bigPos].thresh -= 1 - intervals_[smallPos].thresh;
5757
smallPos = nextSmallPos(smallPos + 1);
5858
}
59+
if (smallPos >= size) break;
5960
bigPos = nextBigPos(bigPos + 1);
6061
// If intervals_[bigPos].thresh < 1, it becomes a small interval
6162
}

paddle/gserver/tests/test_MultinomialSampler.cpp

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -41,39 +41,42 @@ class MultinomialSamplerTester : public MultinomialSampler {
4141
TEST(MultinomialSampler, gen) {
4242
int numGrids = 1024 * 1024;
4343
int size = 1024 * 4;
44-
4544
default_random_engine reng;
46-
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
47-
vector<real> prob;
48-
int sum = 0;
49-
for (int i = 0; i < size; ++i) {
50-
prob.push_back(rand(reng));
51-
sum += prob.back();
52-
}
53-
CHECK_LE(sum, numGrids);
54-
prob.back() += numGrids - sum;
5545

56-
vector<int> counts(size);
57-
MultinomialSamplerTester sampler(&prob[0], size);
58-
counts.assign(size, 0);
59-
{
60-
double s = (double)size / (double)numGrids;
61-
REGISTER_TIMER("MultinomialSampler");
62-
for (double i = 0; i < numGrids; ++i) {
63-
int ret = sampler.testGen([i, s]() { return s * i; });
64-
if (ret < 0 || ret >= size) {
65-
EXPECT_GE(ret, 0);
66-
EXPECT_LT(ret, size);
67-
break;
46+
for (size_t iter=0; iter < 256; ++iter) {
47+
uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
48+
vector<real> prob;
49+
int sum = 0;
50+
for (int i = 0; i < size; ++i) {
51+
prob.push_back(rand(reng));
52+
sum += prob.back();
53+
}
54+
55+
CHECK_LE(sum, numGrids);
56+
prob.back() += numGrids - sum;
57+
58+
vector<int> counts(size);
59+
MultinomialSamplerTester sampler(&prob[0], size);
60+
counts.assign(size, 0);
61+
{
62+
double s = (double)size / (double)numGrids;
63+
REGISTER_TIMER("MultinomialSampler");
64+
for (double i = 0; i < numGrids; ++i) {
65+
int ret = sampler.testGen([i, s]() { return s * i; });
66+
if (ret < 0 || ret >= size) {
67+
EXPECT_GE(ret, 0);
68+
EXPECT_LT(ret, size);
69+
break;
70+
}
71+
++counts[ret];
6872
}
69-
++counts[ret];
7073
}
71-
}
72-
for (int i = 0; i < size; ++i) {
73-
if (prob[i] != counts[i]) {
74-
EXPECT_EQ(prob[i], counts[i]);
75-
LOG(INFO) << "i=" << i;
76-
break;
74+
for (int i = 0; i < size; ++i) {
75+
if (prob[i] != counts[i]) {
76+
EXPECT_EQ(prob[i], counts[i]);
77+
LOG(INFO) << iter;
78+
break;
79+
}
7780
}
7881
}
7982
}
@@ -135,6 +138,7 @@ void benchmarkRandom() {
135138
LOG(INFO) << "sum1=" << sum1;
136139
}
137140

141+
138142
int main(int argc, char** argv) {
139143
initMain(argc, argv);
140144
testing::InitGoogleTest(&argc, argv);

python/paddle/trainer/config_parser.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -636,7 +636,6 @@ def __init__(
636636
input_layer_names,
637637
):
638638
self.add_keys(locals())
639-
640639
self.operator_conf = OperatorConfig()
641640
self.operator_conf.type = self.type
642641

@@ -686,12 +685,15 @@ def __init__(
686685
if num_filters is not None:
687686
self.operator_conf.num_filters = num_filters
688687

689-
parse_conv(conv_conf, input_layer_names[0], self.operator_conf.conv_conf, True)
688+
parse_conv(conv_conf,
689+
MakeLayerNameInSubmodel(input_layer_names[0]),
690+
self.operator_conf.conv_conf)
690691
self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x ** 2) * num_filters
691692

692693
config_assert(len(input_layer_names) == 2, "Conv is binary operator")
693694

694-
695+
def calc_output_size(self, input_sizes):
696+
return self.operator_conf.output_size
695697

696698

697699
# please refer to the comments in proto/ModelConfig.proto
@@ -2462,11 +2464,11 @@ def __init__(
24622464
if size != 0:
24632465
self.set_layer_size(size)
24642466
else:
2465-
size = operator.calc_output_size(operator_conf.input_sizes)
2466-
if size != 0:
2467-
config_assert(size == self.config.size,
2467+
sz = operator.calc_output_size(operator_conf.input_sizes)
2468+
if sz != 0:
2469+
config_assert(sz == self.config.size,
24682470
"different inputs have different size: %s vs. %s" %
2469-
(size, self.config.size))
2471+
(sz, self.config.size))
24702472
for input_index in xrange(len(self.inputs)):
24712473
input_layer = self.get_input_layer(input_index)
24722474
input = self.inputs[input_index]

0 commit comments

Comments
 (0)