Skip to content

Commit fd8d83e

Browse files
author
chengduo
authored
Fix the nested dyn_rnn (#13417)
* add unit test for nested drnn * add nested dyn_rnn * refine while_op * fix bug
1 parent cf12823 commit fd8d83e

File tree

2 files changed

+199
-27
lines changed

2 files changed

+199
-27
lines changed

paddle/fluid/operators/while_op.cc

Lines changed: 63 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2-
3-
Licensed under the Apache License, Version 2.0 (the "License");
4-
you may not use this file except in compliance with the License.
5-
You may obtain a copy of the License at
6-
7-
http://www.apache.org/licenses/LICENSE-2.0
8-
9-
Unless required by applicable law or agreed to in writing, software
10-
distributed under the License is distributed on an "AS IS" BASIS,
11-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12-
See the License for the specific language governing permissions and
13-
limitations under the License. */
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
1414

1515
#include <vector>
1616
#include "paddle/fluid/framework/executor.h"
@@ -138,6 +138,10 @@ class WhileGradOp : public framework::OperatorBase {
138138
auto inside_og_name = inside_og_names[i];
139139
VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
140140
<< inside_og_name;
141+
if (scope.FindVar(outside_og_name) == nullptr) {
142+
continue;
143+
}
144+
141145
auto &og_outside =
142146
detail::Ref(scope.FindVar(outside_og_name),
143147
"Cannot find Outside Gradient %s", outside_og_name);
@@ -167,20 +171,46 @@ class WhileGradOp : public framework::OperatorBase {
167171
PADDLE_ENFORCE_EQ(inside_array[j].numel(), 0);
168172
}
169173
}
174+
} else {
175+
PADDLE_THROW("Currently only support LoDTensor and LoDTensorArray.");
170176
}
171177
}
172178
executor.RunPreparedContext(ctx.get(), *cur_scope_iter, false, true,
173179
true);
174180

175-
auto &pg_names = Outputs(kXGRAD);
181+
// The Outputs(kXGRAD) contains the names of the gradient of parameters
182+
// and inputs.
183+
auto &pg_ig_names = Outputs(kXGRAD);
176184
auto &p_names = Inputs(kX);
177-
PADDLE_ENFORCE_EQ(pg_names.size(), p_names.size());
178-
for (size_t param_id = 0; param_id < pg_names.size(); ++param_id) {
179-
if (pg_names[param_id] == framework::kEmptyVarName) {
185+
PADDLE_ENFORCE_EQ(pg_ig_names.size(), p_names.size());
186+
for (size_t param_id = 0; param_id < pg_ig_names.size(); ++param_id) {
187+
if (pg_ig_names[param_id] == framework::kEmptyVarName) {
180188
continue; // parameter doesn't have gradient
181189
}
182190
auto inside_grad_name = framework::GradVarName(p_names[param_id]);
183191

192+
// for some grad_op, their input doesn't have gradient,
193+
// for example lookup_table_grad_op, the input(Idx) doesn't have
194+
// gradient.
195+
auto pg_ig_var = cur_scope.FindVar(inside_grad_name);
196+
PADDLE_ENFORCE(pg_ig_var != nullptr);
197+
if (pg_ig_var->IsType<framework::LoDTensorArray>()) {
198+
auto pg_ig_lod_t_arr =
199+
pg_ig_var->GetMutable<framework::LoDTensorArray>();
200+
bool empty = true;
201+
for (auto &each : *pg_ig_lod_t_arr) {
202+
if (each.numel() != 0) {
203+
empty = false;
204+
break;
205+
}
206+
}
207+
if (empty) {
208+
LOG(WARNING) << pg_ig_names[param_id]
209+
<< " is not found in cur_scope.";
210+
continue;
211+
}
212+
}
213+
184214
// // TODO(tonyyang-svail): Not sure we need the following
185215
// // If does not compute gradient of that variable inside rnn,
186216
// just
@@ -194,14 +224,19 @@ class WhileGradOp : public framework::OperatorBase {
194224
if (cur_scope_iter == step_scopes->rbegin()) {
195225
auto *var = (*cur_scope_iter)->FindVar(inside_grad_name);
196226
PADDLE_ENFORCE_NOT_NULL(var, "Can not find var %s", inside_grad_name);
227+
PADDLE_ENFORCE(var->IsType<framework::LoDTensorArray>() ||
228+
var->IsType<LoDTensor>(),
229+
"Currently the type of var only can be LoDTensorArray "
230+
"or LoDTensor.");
231+
197232
if (var->IsType<LoDTensor>()) {
198233
auto &inside_tensor = var->Get<framework::LoDTensor>();
199234
framework::AttributeMap attrs;
200235
attrs["dtype"] = framework::ToDataType(inside_tensor.type());
201236
attrs["shape"] = framework::vectorize2int(inside_tensor.dims());
202237
attrs["value"] = 0.0f;
203238

204-
auto var_name = pg_names[param_id];
239+
auto var_name = pg_ig_names[param_id];
205240
auto zero_op = framework::OpRegistry::CreateOp(
206241
"fill_constant", framework::VariableNameMap{},
207242
{{"Out", {var_name}}}, attrs);
@@ -213,8 +248,8 @@ class WhileGradOp : public framework::OperatorBase {
213248
}
214249
auto new_inside_name = cur_scope.Rename(inside_grad_name);
215250
auto sum_op = framework::OpRegistry::CreateOp(
216-
"sum", {{"X", {pg_names[param_id], new_inside_name}}},
217-
{{"Out", {pg_names[param_id]}}},
251+
"sum", {{"X", {pg_ig_names[param_id], new_inside_name}}},
252+
{{"Out", {pg_ig_names[param_id]}}},
218253
framework::AttributeMap{{"use_mkldnn", {false}}});
219254
sum_op->Run(cur_scope, dev_place);
220255
cur_scope.Rename(new_inside_name, inside_grad_name);
@@ -281,6 +316,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
281316
parent_block->FindVarRecursive(input_name) != nullptr)) {
282317
continue;
283318
}
319+
284320
output_grads.insert(input_name);
285321
}
286322
for (auto &output_name : op->OutputArgumentNames()) {
@@ -309,13 +345,13 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
309345
void operator()(const framework::OpDesc &op_desc,
310346
framework::BlockDesc *block) const override {
311347
auto p_names = op_desc.Input(kX);
312-
auto pg_names = op_desc.Output(framework::GradVarName(kX));
348+
auto pg_ig_names = op_desc.Output(framework::GradVarName(kX));
313349

314350
for (size_t i = 0; i < p_names.size(); ++i) {
315351
auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i]));
316-
auto *g_var = block->FindVarRecursive(pg_names[i]);
352+
auto *g_var = block->FindVarRecursive(pg_ig_names[i]);
317353
if (g_var != nullptr) { // Gradient could be @EMPTY@
318-
VLOG(5) << "Setting " << pg_names[i] << " following " << p_names[i]
354+
VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
319355
<< " type: " << p_var.GetType();
320356
g_var->SetType(p_var.GetType());
321357
g_var->SetDataType(p_var.GetDataType());
@@ -333,21 +369,21 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
333369
ctx->HasInputs(framework::GradVarName(kOutputs));
334370

335371
auto p_names = ctx->Inputs(kX);
336-
auto pg_names = ctx->Outputs(kXGRAD);
372+
auto pg_ig_names = ctx->Outputs(kXGRAD);
337373
auto var_types = ctx->GetInputsVarType(kX);
338374
std::vector<std::string> names_to_set;
339375
std::vector<framework::DDim> dims_to_set;
340376
for (size_t i = 0; i < p_names.size(); ++i) {
341-
if (pg_names[i] == framework::kEmptyVarName) {
377+
if (pg_ig_names[i] == framework::kEmptyVarName) {
342378
continue;
343379
}
344380
auto dims = ctx->GetInputsElementDim(kX, i);
345381
if (var_types[i] == framework::proto::VarType::LOD_TENSOR) {
346-
names_to_set.push_back(pg_names[i]);
382+
names_to_set.push_back(pg_ig_names[i]);
347383
dims_to_set.push_back(dims);
348384
} else if (var_types[i] == framework::proto::VarType::LOD_TENSOR_ARRAY) {
349385
// not sure how to set the dim of LOD_TENSOR_ARRAY
350-
names_to_set.push_back(pg_names[i]);
386+
names_to_set.push_back(pg_ig_names[i]);
351387
dims_to_set.push_back(dims);
352388
}
353389
}

python/paddle/fluid/tests/unittests/test_dyn_rnn.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,142 @@ def test_train_dyn_rnn(self):
144144
# loss should be small after 100 mini-batch
145145
self.assertLess(val[0], loss_0[0])
146146

147+
# this unit test is just used to the two layer nested dyn_rnn.
148+
def test_train_nested_dyn_rnn(self):
149+
word_dict = [i for i in range(30)]
150+
151+
def fake_reader():
152+
seq_len, label = [[2, 2]], [0, 1]
153+
data = []
154+
for ele in seq_len:
155+
for j in ele:
156+
data.append([numpy.random.randint(30) \
157+
for _ in range(j)])
158+
159+
while True:
160+
yield data, label
161+
162+
train_data = paddle.batch(fake_reader, batch_size=2)
163+
164+
main_program = fluid.Program()
165+
startup_program = fluid.Program()
166+
with fluid.program_guard(main_program, startup_program):
167+
sentence = fluid.layers.data(
168+
name='word', shape=[1], dtype='int64', lod_level=2)
169+
label = fluid.layers.data(
170+
name='label', shape=[1], dtype='float32', lod_level=1)
171+
172+
rnn = fluid.layers.DynamicRNN()
173+
with rnn.block():
174+
in_ = rnn.step_input(sentence)
175+
sent_emb = fluid.layers.embedding(
176+
input=in_, size=[len(word_dict), 32], dtype='float32')
177+
out_ = fluid.layers.fc(input=sent_emb, size=100, act='tanh')
178+
179+
rnn1 = fluid.layers.DynamicRNN()
180+
with rnn1.block():
181+
in_1 = rnn1.step_input(out_)
182+
out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
183+
rnn1.output(out_1)
184+
185+
last = fluid.layers.sequence_last_step(input=rnn1())
186+
rnn.output(last)
187+
188+
last = rnn()
189+
logits = fluid.layers.fc(input=last, size=1, act=None)
190+
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
191+
x=logits, label=label)
192+
loss = fluid.layers.mean(loss)
193+
sgd = fluid.optimizer.SGD(1e-3)
194+
#sgd = fluid.optimizer.Adam(1e-3)
195+
sgd.minimize(loss=loss)
196+
197+
cpu = fluid.CPUPlace()
198+
exe = fluid.Executor(cpu)
199+
exe.run(startup_program)
200+
feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu)
201+
data = next(train_data())
202+
val = exe.run(main_program, feed=feeder.feed(data),
203+
fetch_list=[loss])[0]
204+
205+
for _ in range(100):
206+
val = exe.run(main_program,
207+
feed=feeder.feed(data),
208+
fetch_list=[loss])[0]
209+
print(val)
210+
211+
# this unit test is just used to the two layer nested dyn_rnn.
212+
def test_train_nested_dyn_rnn2(self):
213+
word_dict = [i for i in range(30)]
214+
215+
def fake_reader():
216+
seq_len, label = [[2, 2]], [0, 1]
217+
data = []
218+
for ele in seq_len:
219+
for j in ele:
220+
data.append([numpy.random.randint(30) \
221+
for _ in range(j)])
222+
223+
while True:
224+
yield data, label
225+
226+
train_data = paddle.batch(fake_reader, batch_size=2)
227+
hidden_size = 32
228+
main_program = fluid.Program()
229+
startup_program = fluid.Program()
230+
with fluid.program_guard(main_program, startup_program):
231+
sentence = fluid.layers.data(
232+
name='word', shape=[1], dtype='int64', lod_level=2)
233+
label = fluid.layers.data(
234+
name='label', shape=[1], dtype='float32', lod_level=1)
235+
236+
rnn = fluid.layers.DynamicRNN()
237+
with rnn.block():
238+
in_ = rnn.step_input(sentence)
239+
sent_emb = fluid.layers.embedding(
240+
input=in_,
241+
size=[len(word_dict), hidden_size],
242+
dtype='float32')
243+
input_forward_proj = fluid.layers.fc(input=sent_emb,
244+
size=hidden_size * 4,
245+
act=None,
246+
bias_attr=False)
247+
forward, _ = fluid.layers.dynamic_lstm(
248+
input=input_forward_proj,
249+
size=hidden_size * 4,
250+
use_peepholes=False)
251+
252+
rnn1 = fluid.layers.DynamicRNN()
253+
with rnn1.block():
254+
in_1 = rnn1.step_input(forward)
255+
out_1 = fluid.layers.fc(input=[in_1], size=100, act='tanh')
256+
rnn1.output(out_1)
257+
258+
last = fluid.layers.sequence_last_step(input=rnn1())
259+
rnn.output(last)
260+
261+
last = rnn()
262+
logits = fluid.layers.fc(input=last, size=1, act=None)
263+
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
264+
x=logits, label=label)
265+
loss = fluid.layers.mean(loss)
266+
sgd = fluid.optimizer.SGD(1e-3)
267+
#sgd = fluid.optimizer.Adam(1e-3)
268+
sgd.minimize(loss=loss)
269+
270+
cpu = fluid.CPUPlace()
271+
exe = fluid.Executor(cpu)
272+
exe.run(startup_program)
273+
feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu)
274+
data = next(train_data())
275+
val = exe.run(main_program, feed=feeder.feed(data),
276+
fetch_list=[loss])[0]
277+
278+
for _ in range(100):
279+
val = exe.run(main_program,
280+
feed=feeder.feed(data),
281+
fetch_list=[loss])[0]
282+
147283

148284
if __name__ == '__main__':
149285
unittest.main()

0 commit comments

Comments
 (0)