Skip to content

Commit 229c2e7

Browse files
authored
Feature/while op sentiment analysis (#6282)
* Add DataFeeder A v2 API like data feeder for book demos. We can feed data directly from reader. * Fix CI * Add an unittest for while/rnn op forward * Add unittest for raw while op backward * Fix CI * Complete Dynamic RNN
1 parent 94a36b8 commit 229c2e7

File tree

4 files changed

+269
-8
lines changed

4 files changed

+269
-8
lines changed

paddle/framework/backward.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ static std::unordered_set<std::string>* g_ctrl_flow_ops_ = nullptr;
3333
// We should design a better way to backward CtrlFlowOps.
3434
static std::unordered_set<std::string>& CtrlFlowOps() {
3535
if (g_ctrl_flow_ops_ == nullptr) {
36-
g_ctrl_flow_ops_ =
37-
new std::unordered_set<std::string>{"increment", "lod_rank_table"};
36+
g_ctrl_flow_ops_ = new std::unordered_set<std::string>{
37+
"increment", "lod_rank_table", "less_than"};
3838
}
3939
return *g_ctrl_flow_ops_;
4040
}

python/paddle/v2/fluid/layer_helper.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,13 @@ def set_variable_initializer(self, var, initializer):
151151
persistable=True,
152152
initializer=initializer)
153153

154+
@property
155+
def to_kwargs(self):
156+
return {
157+
'main_program': self.main_program,
158+
'startup_program': self.startup_program
159+
}
160+
154161
def append_bias_op(self, input_var, dim_start=1, dim_end=None):
155162
"""
156163
Append bias operator and return its output. If the user does not set

python/paddle/v2/fluid/layers.py

Lines changed: 216 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import re
77
import cStringIO
88
from param_attr import ParamAttr
9+
import contextlib
910

1011
__all__ = [
1112
'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat',
@@ -1395,7 +1396,7 @@ def lod_tensor_to_array(x, table, main_program=None):
13951396
return array
13961397

13971398

1398-
def array_to_lod_tensor(x, table, main_program=None):
1399+
def array_to_lod_tensor(x, table, main_program=None, startup_program=None):
13991400
"""
14001401
This function creates an operator to convert an array to a
14011402
LOD_Tensor.
@@ -1476,7 +1477,11 @@ def zeros(shape, dtype, main_program=None):
14761477
return fill_constant(value=0.0, **locals())
14771478

14781479

1479-
def increment(x, value=1.0, in_place=True, main_program=None):
1480+
def increment(x,
1481+
value=1.0,
1482+
in_place=True,
1483+
main_program=None,
1484+
startup_program=None):
14801485
"""
14811486
This function creates an operator to increment each value in the input
14821487
`x` by an amount: `value` as mentioned in the input parameter. This
@@ -1495,7 +1500,7 @@ def increment(x, value=1.0, in_place=True, main_program=None):
14951500
return out
14961501

14971502

1498-
def array_write(x, i, array=None, main_program=None):
1503+
def array_write(x, i, array=None, main_program=None, startup_program=None):
14991504
"""
15001505
This function creates an operator to write the data out as a
15011506
LOD_TENSOR_ARRAY.
@@ -1534,7 +1539,7 @@ def less_than(x, y, cond=None, main_program=None, **ignored):
15341539
return cond
15351540

15361541

1537-
def array_read(array, i, main_program=None):
1542+
def array_read(array, i, main_program=None, startup_program=None):
15381543
"""
15391544
This function creates an operator to read the data in as a
15401545
LOD_TENSOR_ARRAY.
@@ -1553,7 +1558,7 @@ def array_read(array, i, main_program=None):
15531558
return out
15541559

15551560

1556-
def shrink_memory(x, i, table, main_program=None):
1561+
def shrink_memory(x, i, table, main_program=None, startup_program=None):
15571562
"""
15581563
This function creates an operator to shrink_rnn_memory using the RankTable
15591564
as mentioned in the input parameter.
@@ -1890,3 +1895,209 @@ def __call__(self):
18901895
main_program=self.helper.main_program,
18911896
startup_program=self.helper.startup_program))
18921897
return rlist
1898+
1899+
1900+
class DynamicRNN(object):
1901+
BEFORE_RNN = 0
1902+
IN_RNN = 1
1903+
AFTER_RNN = 2
1904+
1905+
def __init__(self, name=None, main_program=None, startup_program=None):
1906+
self.helper = LayerHelper(
1907+
'dynamic_rnn',
1908+
name=name,
1909+
main_program=main_program,
1910+
startup_program=startup_program)
1911+
self.status = DynamicRNN.BEFORE_RNN
1912+
self.lod_rank_table = None
1913+
self.max_seq_len = None
1914+
self.step_idx = None
1915+
self.zero_idx = fill_constant(shape=[1], value=0, dtype='int64')
1916+
self.mem_dict = dict()
1917+
self.output_array = []
1918+
self.outputs = []
1919+
self.cond = self.helper.create_tmp_variable(dtype='bool')
1920+
self.cond.stop_gradient = False
1921+
self.while_op = While(self.cond)
1922+
self.input_array = []
1923+
self.mem_link = []
1924+
1925+
def step_input(self, x):
1926+
self._assert_in_rnn_block_("step_input")
1927+
if not isinstance(x, Variable):
1928+
raise TypeError(
1929+
"step_input() can only take a Variable as its input")
1930+
parent_block = self._parent_block_()
1931+
if self.lod_rank_table is None:
1932+
self.lod_rank_table = parent_block.create_var(
1933+
name=unique_name('lod_rank_table'),
1934+
type=core.VarDesc.VarType.LOD_RANK_TABLE)
1935+
self.lod_rank_table.stop_gradient = True
1936+
parent_block.append_op(
1937+
type='lod_rank_table',
1938+
inputs={"X": x},
1939+
outputs={"Out": self.lod_rank_table})
1940+
self.max_seq_len = parent_block.create_var(
1941+
name=unique_name('dynamic_rnn_max_seq_len'), dtype='int64')
1942+
self.max_seq_len.stop_gradient = False
1943+
parent_block.append_op(
1944+
type='max_sequence_len',
1945+
inputs={'RankTable': self.lod_rank_table},
1946+
outputs={"Out": self.max_seq_len})
1947+
self.cond.stop_gradient = True
1948+
parent_block.append_op(
1949+
type='less_than',
1950+
inputs={'X': self.step_idx,
1951+
'Y': self.max_seq_len},
1952+
outputs={'Out': self.cond})
1953+
1954+
input_array = parent_block.create_var(
1955+
name=unique_name('dynamic_rnn_input_array'),
1956+
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
1957+
dtype=x.dtype)
1958+
self.input_array.append((input_array, x.dtype))
1959+
parent_block.append_op(
1960+
type='lod_tensor_to_array',
1961+
inputs={'X': x,
1962+
'RankTable': self.lod_rank_table},
1963+
outputs={'Out': input_array})
1964+
return array_read(
1965+
array=input_array, i=self.step_idx, **self.helper.to_kwargs)
1966+
1967+
@contextlib.contextmanager
1968+
def block(self):
1969+
if self.status != DynamicRNN.BEFORE_RNN:
1970+
raise ValueError("rnn.block() can only be invoke once")
1971+
self.step_idx = fill_constant(shape=[1], dtype='int64', value=0)
1972+
self.step_idx.stop_gradient = False
1973+
self.status = DynamicRNN.IN_RNN
1974+
with self.while_op.block():
1975+
yield
1976+
increment(
1977+
x=self.step_idx,
1978+
value=1.0,
1979+
in_place=True,
1980+
**self.helper.to_kwargs)
1981+
1982+
for new_mem, mem_array in self.mem_link:
1983+
array_write(
1984+
x=new_mem,
1985+
i=self.step_idx,
1986+
array=mem_array,
1987+
**self.helper.to_kwargs)
1988+
1989+
less_than(
1990+
x=self.step_idx,
1991+
y=self.max_seq_len,
1992+
cond=self.cond,
1993+
**self.helper.to_kwargs)
1994+
1995+
self.status = DynamicRNN.AFTER_RNN
1996+
for each_array in self.output_array:
1997+
self.outputs.append(
1998+
array_to_lod_tensor(
1999+
x=each_array,
2000+
table=self.lod_rank_table,
2001+
**self.helper.to_kwargs))
2002+
2003+
def __call__(self, *args, **kwargs):
2004+
if self.status != DynamicRNN.AFTER_RNN:
2005+
raise ValueError(
2006+
"Dynamic RNN outputs can only be retrieved after rnn block")
2007+
if len(self.outputs) == 1:
2008+
return self.outputs[0]
2009+
else:
2010+
return self.outputs
2011+
2012+
def memory(self, init=None, shape=None, value=0.0, dtype='float32'):
2013+
self._assert_in_rnn_block_('memory')
2014+
if init is not None:
2015+
if not isinstance(init, Variable):
2016+
raise TypeError(
2017+
"The input arg `init` of memory() must be a Variable")
2018+
parent_block = self._parent_block_()
2019+
mem_array = parent_block.create_var(
2020+
name=unique_name('dynamic_rnn_mem_array'),
2021+
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
2022+
dtype=init.dtype)
2023+
parent_block.append_op(
2024+
type='write_to_array',
2025+
inputs={'X': init,
2026+
'I': self.zero_idx},
2027+
outputs={'Out': mem_array})
2028+
retv = array_read(
2029+
array=mem_array, i=self.step_idx, **self.helper.to_kwargs)
2030+
retv = shrink_memory(
2031+
x=retv,
2032+
i=self.step_idx,
2033+
table=self.lod_rank_table,
2034+
**self.helper.to_kwargs)
2035+
self.mem_dict[retv.name] = mem_array
2036+
return retv
2037+
else:
2038+
if len(self.input_array) == 0:
2039+
raise ValueError(
2040+
"step_input should be invoked before memory(shape=..., value=...)"
2041+
)
2042+
parent_block = self._parent_block_()
2043+
init = parent_block.create_var(
2044+
name=unique_name('mem_init'), dtype=dtype)
2045+
arr, dtype = self.input_array[0]
2046+
in0 = parent_block.create_var(name=unique_name('in0'), dtype=dtype)
2047+
parent_block.append_op(
2048+
type='read_from_array',
2049+
inputs={'X': [arr],
2050+
'I': [self.zero_idx]},
2051+
outputs={'Out': [in0]})
2052+
parent_block.append_op(
2053+
type='fill_constant_batch_size_like',
2054+
inputs={'Input': [in0]},
2055+
outputs={'Out': [init]},
2056+
attrs={
2057+
'shape': [-1] + shape,
2058+
'value': float(value),
2059+
'dtype': init.dtype
2060+
})
2061+
return self.memory(init=init)
2062+
2063+
def update_memory(self, ex_mem, new_mem):
2064+
self._assert_in_rnn_block_('update_memory')
2065+
if not isinstance(ex_mem, Variable):
2066+
raise TypeError("The input arg `ex_mem` of update_memory() must "
2067+
"be a Variable")
2068+
if not isinstance(new_mem, Variable):
2069+
raise TypeError("The input arg `new_mem` of update_memory() must "
2070+
"be a Variable")
2071+
2072+
mem_array = self.mem_dict.get(ex_mem.name, None)
2073+
if mem_array is None:
2074+
raise ValueError("Please invoke memory before update_memory")
2075+
if self.lod_rank_table is None:
2076+
raise ValueError("Please invoke step_input before update_memory")
2077+
2078+
self.mem_link.append((new_mem, mem_array))
2079+
2080+
def output(self, *outputs):
2081+
self._assert_in_rnn_block_('output')
2082+
parent_block = self._parent_block_()
2083+
for each in outputs:
2084+
outside_array = parent_block.create_var(
2085+
name=unique_name("_".join(
2086+
[self.helper.name, "output_array", each.name])),
2087+
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
2088+
dtype=each.dtype)
2089+
array_write(x=each, i=self.step_idx, array=outside_array)
2090+
self.output_array.append(outside_array)
2091+
2092+
def _parent_block_(self):
2093+
prog = self.helper.main_program
2094+
parent_idx = prog.current_block().parent_idx
2095+
assert parent_idx >= 0
2096+
parent_block = prog.block(parent_idx)
2097+
2098+
return parent_block
2099+
2100+
def _assert_in_rnn_block_(self, method):
2101+
if self.status != DynamicRNN.IN_RNN:
2102+
raise ValueError("{0} can only be invoked inside rnn block.".format(
2103+
method))

python/paddle/v2/fluid/tests/test_dyn_rnn.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
class TestDynRNN(unittest.TestCase):
88
def setUp(self):
99
self.word_dict = paddle.dataset.imdb.word_dict()
10-
self.BATCH_SIZE = 100
10+
self.BATCH_SIZE = 2
1111
self.train_data = paddle.batch(
1212
paddle.dataset.imdb.train(self.word_dict),
1313
batch_size=self.BATCH_SIZE)
@@ -55,6 +55,7 @@ def test_plain_while_op(self):
5555
mem = fluid.layers.shrink_memory(x=mem, i=i, table=rank_table)
5656

5757
hidden = fluid.layers.fc(input=[mem, ipt], size=100, act='tanh')
58+
5859
fluid.layers.array_write(x=hidden, i=i, array=out)
5960
fluid.layers.increment(x=i, in_place=True)
6061
fluid.layers.array_write(x=hidden, i=i, array=mem_array)
@@ -82,6 +83,48 @@ def test_plain_while_op(self):
8283
print(val)
8384
self.assertFalse(numpy.isnan(val))
8485

86+
def test_train_dyn_rnn(self):
87+
main_program = fluid.Program()
88+
startup_program = fluid.Program()
89+
with fluid.program_guard(main_program, startup_program):
90+
sentence = fluid.layers.data(
91+
name='word', shape=[1], dtype='int64', lod_level=1)
92+
sent_emb = fluid.layers.embedding(
93+
input=sentence, size=[len(self.word_dict), 32], dtype='float32')
94+
95+
rnn = fluid.layers.DynamicRNN()
96+
97+
with rnn.block():
98+
in_ = rnn.step_input(sent_emb)
99+
mem = rnn.memory(shape=[100], dtype='float32')
100+
out_ = fluid.layers.fc(input=[in_, mem], size=100, act='tanh')
101+
rnn.update_memory(mem, out_)
102+
rnn.output(out_)
103+
104+
last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
105+
logits = fluid.layers.fc(input=last, size=1, act=None)
106+
label = fluid.layers.data(name='label', shape=[1], dtype='float32')
107+
loss = fluid.layers.sigmoid_cross_entropy_with_logits(
108+
x=logits, label=label)
109+
loss = fluid.layers.mean(x=loss)
110+
sgd = fluid.optimizer.Adam(1e-3)
111+
sgd.minimize(loss=loss)
112+
113+
cpu = fluid.CPUPlace()
114+
exe = fluid.Executor(cpu)
115+
exe.run(startup_program)
116+
feeder = fluid.DataFeeder(feed_list=[sentence, label], place=cpu)
117+
data = next(self.train_data())
118+
loss_0 = exe.run(main_program,
119+
feed=feeder.feed(data),
120+
fetch_list=[loss])[0]
121+
for _ in xrange(100):
122+
val = exe.run(main_program,
123+
feed=feeder.feed(data),
124+
fetch_list=[loss])[0]
125+
# loss should be small after 100 mini-batch
126+
self.assertLess(val[0], loss_0[0])
127+
85128

86129
if __name__ == '__main__':
87130
unittest.main()

0 commit comments

Comments
 (0)