Skip to content

Commit f23691d

Browse files
authored
Merge pull request #7434 from pkuyym/fix-7195
Add static_input for DynamicRNN
2 parents 535fefb + 25fee87 commit f23691d

File tree

2 files changed

+212
-0
lines changed

2 files changed

+212
-0
lines changed

python/paddle/v2/fluid/layers/control_flow.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,6 +1291,26 @@ def step_input(self, x):
12911291
outputs={'Out': input_array})
12921292
return array_read(array=input_array, i=self.step_idx)
12931293

1294+
def static_input(self, x):
1295+
self._assert_in_rnn_block_("static_input")
1296+
if not isinstance(x, Variable):
1297+
raise TypeError(
1298+
"static_input() can only take a Variable as its input")
1299+
if self.lod_rank_table is None:
1300+
raise RuntimeError(
1301+
"static_input() must be called after step_input().")
1302+
parent_block = self._parent_block_()
1303+
x_reordered = parent_block.create_var(
1304+
name=unique_name("dynamic_rnn_static_input_reordered"),
1305+
type=core.VarDesc.VarType.LOD_TENSOR,
1306+
dtype=x.dtype)
1307+
parent_block.append_op(
1308+
type='reorder_lod_tensor_by_rank',
1309+
inputs={'X': [x],
1310+
'RankTable': [self.lod_rank_table]},
1311+
outputs={'Out': [x_reordered]})
1312+
return shrink_memory(x_reordered, self.step_idx, self.lod_rank_table)
1313+
12941314
@contextlib.contextmanager
12951315
def block(self):
12961316
if self.status != DynamicRNN.BEFORE_RNN:
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import unittest
2+
import paddle.v2 as paddle
3+
import paddle.v2.fluid.core as core
4+
import paddle.v2.fluid as fluid
5+
from paddle.v2.fluid.backward import append_backward
6+
import paddle.v2.fluid.framework as framework
7+
from paddle.v2.fluid.framework import Program, switch_main_program
8+
import bisect
9+
import numpy as np
10+
11+
fluid.default_startup_program().random_seed = 1
12+
13+
14+
class TestDyRnnStaticInput(unittest.TestCase):
15+
def setUp(self):
16+
self._delta = 0.005
17+
self._max_sequence_len = 3
18+
self._program = Program()
19+
switch_main_program(self._program)
20+
self.output_dim = 10
21+
self.place = core.CPUPlace()
22+
self.prepare_x_tensor()
23+
self.prepare_static_input_tensor()
24+
self.exe = fluid.Executor(self.place)
25+
26+
def prepare_x_tensor(self):
27+
self.x_tensor_dim = 10
28+
lod = [[0, 2, 3, 6]]
29+
shape = [lod[0][-1], self.x_tensor_dim]
30+
self.x_tensor_data = np.random.random(shape).astype('float32')
31+
self.x_tensor = core.LoDTensor()
32+
self.x_tensor.set_lod(lod)
33+
self.x_tensor.set(self.x_tensor_data, self.place)
34+
35+
def prepare_static_input_tensor(self):
36+
self.static_input_tensor_dim = 4
37+
lod = [[0, 1, 3, 6]]
38+
shape = [lod[0][-1], self.static_input_tensor_dim]
39+
self.static_input_data = np.random.random(shape).astype('float32')
40+
self.static_input_tensor = core.LoDTensor()
41+
self.static_input_tensor.set_lod(lod)
42+
self.static_input_tensor.set(self.static_input_data, self.place)
43+
44+
def fetch_value(self, var):
45+
fetch_outs = self.exe.run(feed={
46+
'x_tensor': self.x_tensor,
47+
'static_input_tensor': self.static_input_tensor
48+
},
49+
fetch_list=[var],
50+
return_numpy=False)
51+
return self._lodtensor_to_ndarray(fetch_outs[0])
52+
53+
def _lodtensor_to_ndarray(self, lod_tensor):
54+
dims = lod_tensor.get_dims()
55+
ndarray = np.zeros(shape=dims).astype('float32')
56+
for i in xrange(np.product(dims)):
57+
ndarray.ravel()[i] = lod_tensor.get_float_element(i)
58+
return ndarray, lod_tensor.lod()
59+
60+
def build_graph(self, only_forward=False):
61+
x_tensor = fluid.layers.data(
62+
name='x_tensor',
63+
shape=[self.x_tensor_dim],
64+
dtype='float32',
65+
lod_level=1)
66+
x_tensor.stop_gradient = False
67+
68+
static_input_tensor = fluid.layers.data(
69+
name='static_input_tensor',
70+
shape=[self.static_input_tensor_dim],
71+
dtype='float32',
72+
lod_level=1)
73+
static_input_tensor.stop_gradient = False
74+
75+
if only_forward:
76+
static_input_out_array = self._program.global_block().create_var(
77+
name='static_input_out_array',
78+
type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
79+
dtype='float32')
80+
static_input_out_array.stop_gradient = True
81+
82+
rnn = fluid.layers.DynamicRNN()
83+
with rnn.block():
84+
step_x = rnn.step_input(x_tensor)
85+
step_static_input = rnn.static_input(static_input_tensor)
86+
if only_forward:
87+
fluid.layers.array_write(
88+
x=step_static_input,
89+
i=rnn.step_idx,
90+
array=static_input_out_array)
91+
last = fluid.layers.sequence_pool(
92+
input=step_static_input, pool_type='last')
93+
projected = fluid.layers.fc(input=[step_x, last],
94+
size=self.output_dim)
95+
rnn.output(projected)
96+
97+
if only_forward:
98+
static_input_step_outs = []
99+
step_idx = fluid.layers.fill_constant(
100+
shape=[1], dtype='int64', value=0)
101+
step_idx.stop_gradient = True
102+
103+
for i in xrange(self._max_sequence_len):
104+
step_out = fluid.layers.array_read(static_input_out_array,
105+
step_idx)
106+
step_out.stop_gradient = True
107+
static_input_step_outs.append(step_out)
108+
fluid.layers.increment(x=step_idx, value=1.0, in_place=True)
109+
110+
if only_forward:
111+
return static_input_step_outs
112+
113+
last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
114+
loss = fluid.layers.mean(x=last)
115+
append_backward(loss)
116+
static_input_grad = self._program.global_block().var(
117+
framework.grad_var_name('static_input_tensor'))
118+
return static_input_grad, loss
119+
120+
def get_seq_len_from_lod(self, lod):
121+
return [lod[0][i + 1] - lod[0][i] for i in xrange(len(lod[0]) - 1)]
122+
123+
def get_expected_static_step_outs(self):
124+
x_lod = self.x_tensor.lod()
125+
x_seq_len = self.get_seq_len_from_lod(x_lod)
126+
x_seq_len_sorted = sorted(x_seq_len)
127+
x_sorted_indices = np.argsort(x_seq_len)[::-1]
128+
129+
static_lod = self.static_input_tensor.lod()
130+
static_sliced = [
131+
self.static_input_data[static_lod[0][i]:static_lod[0][i + 1]]
132+
for i in xrange(len(static_lod[0]) - 1)
133+
]
134+
static_seq_len = self.get_seq_len_from_lod(static_lod)
135+
static_reordered = []
136+
for i in xrange(len(x_sorted_indices)):
137+
static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
138+
static_seq_len_reordered = [
139+
static_seq_len[x_sorted_indices[i]]
140+
for i in xrange(len(x_sorted_indices))
141+
]
142+
143+
static_step_outs = []
144+
static_step_lods = []
145+
146+
for i in xrange(self._max_sequence_len):
147+
end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
148+
lod = [0]
149+
for i in xrange(end):
150+
lod.append(static_seq_len_reordered[i] + lod[-1])
151+
static_step_lods.append([lod])
152+
end = lod[-1]
153+
static_step_outs.append(
154+
np.array(static_reordered[:end]).astype('float32'))
155+
156+
return static_step_outs, static_step_lods
157+
158+
def test_step_out(self):
159+
static_step_outs = self.build_graph(only_forward=True)
160+
self.exe.run(framework.default_startup_program())
161+
expected_outs, expected_lods = self.get_expected_static_step_outs()
162+
for i in xrange(self._max_sequence_len):
163+
step_out, lod = self.fetch_value(static_step_outs[i])
164+
self.assertTrue(np.allclose(step_out, expected_outs[i]))
165+
self.assertTrue(np.allclose(lod, expected_lods[i]))
166+
167+
def test_network_gradient(self):
168+
static_input_grad, loss = self.build_graph()
169+
self.exe.run(framework.default_startup_program())
170+
171+
actual_gradients, actual_lod = self.fetch_value(static_input_grad)
172+
173+
static_input_shape = self.static_input_tensor.get_dims()
174+
numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
175+
# calculate numeric gradients
176+
tensor_size = np.product(static_input_shape)
177+
for i in xrange(tensor_size):
178+
origin = self.static_input_tensor.get_float_element(i)
179+
x_pos = origin + self._delta
180+
self.static_input_tensor.set_float_element(i, x_pos)
181+
y_pos = self.fetch_value(loss)[0][0]
182+
x_neg = origin - self._delta
183+
self.static_input_tensor.set_float_element(i, x_neg)
184+
y_neg = self.fetch_value(loss)[0][0]
185+
self.static_input_tensor.set_float_element(i, origin)
186+
numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
187+
self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
188+
self.assertTrue(np.allclose(actual_lod, self.static_input_tensor.lod()))
189+
190+
191+
if __name__ == '__main__':
192+
unittest.main()

0 commit comments

Comments
 (0)