Skip to content

Commit 17e3373

Browse files
committed
Enhance unit testing and fix bug.
1 parent 8bec26b commit 17e3373

File tree

6 files changed

+101
-62
lines changed

6 files changed

+101
-62
lines changed

paddle/operators/lstm_op.h

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,6 @@ class LSTMKernel : public framework::OpKernel<T> {
5656
framework::DDim dims({in_dims[0], frame_size});
5757

5858
if (bias) {
59-
// framework::Tensor cpu_t;
60-
// cpu_t.mutable_data<T>(in_dims, platform::CPUPlace());
61-
// cpu_t.CopyFrom<T>(*batch_gate, platform::CPUPlace(),
62-
// ctx.device_context());
6359
Eigen::array<int, 2> extents({{1, 4 * frame_size}});
6460
Eigen::array<int, 2> offsets({{0, 0}});
6561
auto b = EigenMatrix<T>::From(*bias);
@@ -105,14 +101,14 @@ class LSTMKernel : public framework::OpKernel<T> {
105101
int cur_batch_size = bend - bstart;
106102

107103
if (n != 0) {
108-
int pre_end = batch_lod[n - 1];
109-
auto pre_hidden_t = batch_out.Slice<T>(pre_end, bstart);
104+
int pre_h_start = batch_lod[n - 1];
105+
int pre_h_end = pre_h_start + cur_batch_size;
106+
auto pre_hidden_t = batch_out.Slice<T>(pre_h_start, pre_h_end);
110107
math::matmul<Place, T>(ctx.device_context(), pre_hidden_t, false,
111108
*weight, false, static_cast<T>(1.0), &gate_t,
112-
static_cast<T>(0.0));
109+
static_cast<T>(1.0));
113110
}
114-
// else if : how to pass the state from
115-
// last mini-batch will be supported later
111+
// else if : support the initial hidden and cell
116112

117113
lstm_value.gateValue = gate_t.data<T>();
118114
lstm_value.outputValue = out_t.data<T>();
@@ -132,9 +128,6 @@ class LSTMKernel : public framework::OpKernel<T> {
132128
batch_cell.set_lod(batch_gate->lod());
133129
// restore the output cell state in LoDTensor from the batch cell
134130
to_seq(ctx.device_context(), batch_cell, *cell_out);
135-
136-
auto t = framework::EigenVector<T>::Flatten(*batch_gate);
137-
t.device(ctx.GetEigenDevice<Place>()) = t.constant(static_cast<T>(0));
138131
}
139132
};
140133

paddle/operators/math/detail/hl_gpu_functions.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ __device__ static float sigmoid(const float a) {
3030
}
3131

3232
__device__ static float tanh(const float a) {
33-
return __fdividef(2.0f, (1.0f + __expf(-2.0f * a))) - 1.0f;
33+
float tmp = -2.0 * a;
34+
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
35+
return __fdividef(2.0f, (1.0f + __expf(-2.0f * tmp))) - 1.0f;
3436
}
3537

3638
__device__ static float linear(const float a) { return a; }
@@ -63,6 +65,8 @@ __device__ static double sigmoid(const double a) {
6365
}
6466

6567
__device__ static double tanh(const double a) {
68+
double tmp = -2.0 * a;
69+
tmp = (tmp > EXP_MAX_INPUT) ? EXP_MAX_INPUT : tmp;
6670
return (2.0 / (1.0 + exp(-2.0 * a))) - 1.0;
6771
}
6872

paddle/operators/math/detail/lstm_gpu_kernel.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,13 @@ void gpu_lstm_forward(const platform::DeviceContext& context, Op op,
205205
if (batchSize == 1) {
206206
KeLstmForward<T, Op,
207207
/* isBatch= */ false><<<grid, threads, 0, stream>>>(
208-
op, value, frameSize, batchSize, active_node, active_gate, active_gate);
208+
op, value, frameSize, batchSize, active_node, active_gate,
209+
active_state);
209210
} else {
210211
KeLstmForward<T, Op,
211212
/* isBatch= */ true><<<grid, threads, 0, stream>>>(
212-
op, value, frameSize, batchSize, active_node, active_gate, active_gate);
213+
op, value, frameSize, batchSize, active_node, active_gate,
214+
active_state);
213215
}
214216
}
215217

paddle/operators/math/lstm_compute.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ inline activation_mode_t ActiveType(const std::string &type) {
6060
return HL_ACTIVATION_RELU;
6161
} else if (type == "tanh") {
6262
return HL_ACTIVATION_TANH;
63-
} else if (type == "linear" || type == "") {
63+
} else if (type == "linear" || type == "identity" || type == "") {
6464
return HL_ACTIVATION_LINEAR;
6565
} else {
6666
PADDLE_THROW("Do not support activation type.");

python/paddle/v2/framework/tests/op_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,15 +242,15 @@ def check_output_with_place(self, place, atol):
242242
self.assertTrue(
243243
np.allclose(
244244
actual, expect, atol=atol),
245-
"output name: " + out_name + " has diff.")
245+
"Output (" + out_name + ") has diff at " + str(place))
246246
else:
247247
actual = np.array(self.scope.find_var(out_name).get_tensor())
248248
expect = self.outputs[out_name]
249249

250250
self.assertTrue(
251251
np.allclose(
252252
actual, expect, atol=atol),
253-
"output name: " + out_name + " has diff.")
253+
"Output (" + out_name + ") has diff at " + str(place))
254254

255255
def check_output(self, atol=1e-5):
256256
places = [core.CPUPlace()]

python/paddle/v2/framework/tests/test_lstm_op.py

Lines changed: 84 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,14 @@ def relu(x):
2828
return np.maximum(x, 0)
2929

3030

31+
ACTVATION = {
32+
'identity': identity,
33+
'sigmoid': sigmoid,
34+
'tanh': tanh,
35+
'relu': relu
36+
}
37+
38+
3139
def lstm(
3240
input, # T x 4D
3341
lod, # 1 x N
@@ -37,37 +45,45 @@ def lstm(
3745
w_b=None, # 1 x 4D
3846
w_c=None, # 1 x 3D
3947
is_reverse=False,
40-
gate_act=None,
41-
cell_act=None,
42-
cand_act=None):
43-
def _step(x, w_h, w_c, h_pre, c_pre, gate_act, cell_act, cand_act):
48+
act_gate=None,
49+
act_cell=None,
50+
act_cand=None):
51+
def _step(x, w_h, w_c, h_pre, c_pre, act_gate, act_cell, act_cand):
4452
g = np.dot(h_pre, w_h) # 1 x 4D
4553
g = g + x
4654
g = np.reshape(g, (1, g.size))
4755
c_tmp, g_i, g_f, g_o = np.split(g, 4, axis=1)
4856
if w_c is None:
49-
g_i = gate_act(g_i) # 1 x D
50-
g_f = gate_act(g_f) # 1 x D
57+
g_i = act_gate(g_i) # 1 x D
58+
g_f = act_gate(g_f) # 1 x D
5159
else:
5260
w_ic, w_fc, w_oc = np.split(w_c, 3, axis=1)
53-
g_i = gate_act(g_i + w_ic * c_pre) # 1 x D
54-
g_f = gate_act(g_f + w_fc * c_pre) # 1 x D
55-
c = g_f * c_pre + g_i * cand_act(c_tmp) # 1 x D
61+
g_i = act_gate(g_i + w_ic * c_pre) # 1 x D
62+
g_f = act_gate(g_f + w_fc * c_pre) # 1 x D
63+
c = g_f * c_pre + g_i * act_cand(c_tmp) # 1 x D
5664

5765
if w_c is None:
58-
g_o = gate_act(g_o) # 1 x D
66+
g_o = act_gate(g_o) # 1 x D
5967
else:
6068
_, _, w_oc = np.split(w_c, 3, axis=1)
61-
g_o = gate_act(g_o + w_oc * c) # 1 x D
62-
h = g_o * cell_act(c)
63-
bg = np.concatenate((cand_act(c_tmp), g_i, g_f, g_o), axis=1)
69+
g_o = act_gate(g_o + w_oc * c) # 1 x D
70+
h = g_o * act_cell(c)
71+
bg = np.concatenate((act_cand(c_tmp), g_i, g_f, g_o), axis=1)
6472
return h, c, bg
6573

74+
def _reverse(x, lod):
75+
y = np.zeros_like(x)
76+
for i in range(len(lod) - 1):
77+
b, e = lod[i], lod[i + 1]
78+
y[b:e, :] = np.flip(x[b:e, :], 0)
79+
return y
80+
6681
offset = lod[0]
6782
batch_size = len(offset) - 1
6883
hidden = []
6984
cell = []
7085
gate = []
86+
input = _reverse(input, offset) if is_reverse else input
7187
if w_b is not None:
7288
input = input + np.tile(w_b, (offset[-1], 1))
7389
for i in range(batch_size):
@@ -78,47 +94,62 @@ def _step(x, w_h, w_c, h_pre, c_pre, gate_act, cell_act, cand_act):
7894
c_pre = c0[i] # 1 x D
7995
for j in range(seq_len):
8096
# compute one step
81-
h_pre, c_pre, g_pre = _step(x[j], w_h, w_c, h_pre, c_pre, gate_act,
82-
cell_act, cand_act)
97+
h_pre, c_pre, g_pre = _step(x[j], w_h, w_c, h_pre, c_pre, act_gate,
98+
act_cell, act_cand)
8399
hidden.append(h_pre.flatten())
84100
cell.append(c_pre.flatten())
85101
gate.append(g_pre.flatten())
86102

87103
hidden = np.array(hidden).astype("float64")
88104
cell = np.array(cell).astype("float64")
89105
gate = np.array(gate).astype("float64")
106+
107+
hidden = _reverse(hidden, offset) if is_reverse else hidden
108+
cell = _reverse(cell, offset) if is_reverse else cell
109+
90110
assert gate.shape == input.shape
91111
assert hidden.shape == (input.shape[0], input.shape[1] / 4)
92112
assert cell.shape == (input.shape[0], input.shape[1] / 4)
93113
return hidden, cell, gate
94114

95115

96-
class LstmUnitTest(OpTest):
116+
class TestLstmOp(OpTest):
97117
def set_data(self):
98-
D = 4
99-
#lod = [[0, 2, 6, 9]]
100-
lod = [[0, 1]]
101-
shape = (1, D)
102-
103-
x = np.random.normal(size=(1, 4 * D)).astype("float64")
104-
h0 = np.zeros((4, D)).astype("float64")
105-
c0 = np.zeros((4, D)).astype("float64")
106-
w = np.random.normal(size=(D, 4 * D)).astype("float64")
107-
b = np.random.normal(size=(1, 7 * D)).astype("float64")
108-
109-
w_b = b[:, 0:4 * D]
110-
w_c = b[:, 4 * D:]
111-
#h, c, g = lstm(x, lod, h0, c0, w, w_b, w_c, False, sigmoid, tanh, tanh)
112-
h, c, g = lstm(x, lod, h0, c0, w, w_b, w_c, False, identity, identity,
113-
identity)
118+
self.lod = [[0, 2, 6, 9]]
119+
self.D = 64
120+
self.sort_idx = [2, 6, 0, 3, 7, 1, 4, 8, 5]
121+
122+
self.act_gate = "sigmoid"
123+
self.act_cell = "tanh"
124+
self.act_cand = "tanh"
125+
126+
self.is_reverse = False
127+
128+
def setUp(self):
129+
self.set_data()
130+
self.op_type = "lstm"
131+
132+
T = self.lod[0][-1]
133+
N = len(self.lod[0]) - 1
134+
135+
x = np.random.normal(size=(T, 4 * self.D)).astype("float64")
136+
h0 = np.zeros((N, self.D)).astype("float64")
137+
c0 = np.zeros((N, self.D)).astype("float64")
138+
w = np.random.normal(size=(self.D, 4 * self.D)).astype("float64")
139+
b = np.random.normal(size=(1, 7 * self.D)).astype("float64")
140+
141+
w_b = b[:, 0:4 * self.D]
142+
w_c = b[:, 4 * self.D:]
143+
h, c, g = lstm(x, self.lod, h0, c0, w, w_b, w_c, self.is_reverse,
144+
ACTVATION[self.act_gate], ACTVATION[self.act_cell],
145+
ACTVATION[self.act_cand])
114146

115147
g_sort = np.zeros_like(x)
116-
#idx = [2,6,0,3,7,1,4,8,5]
117-
#for i, j in enumerate(idx):
118-
# g_sort[i, :] = g[j, :]
148+
for i, j in enumerate(self.sort_idx):
149+
g_sort[i, :] = g[j, :]
119150

120151
self.inputs = {
121-
'Input': (x, lod),
152+
'Input': (x, self.lod),
122153
'H0': h0,
123154
'C0': c0,
124155
'Weight': w,
@@ -127,19 +158,28 @@ def set_data(self):
127158
self.outputs = {'Hidden': h, 'Cell': c, 'BatchGate': g_sort}
128159
self.attrs = {
129160
'usePeepholes': True,
130-
'isReverse': False,
131-
'gateActivation': 'linear',
132-
'cellActivation': 'linear',
133-
'candidateActivation': 'linear'
161+
'isReverse': self.is_reverse,
162+
'gateActivation': 'sigmoid',
163+
'cellActivation': 'tanh',
164+
'candidateActivation': 'tanh'
134165
}
135166

136-
def setUp(self):
137-
self.set_data()
138-
self.op_type = "lstm"
139-
140167
def test_check_output(self):
141168
self.check_output()
142169

143170

171+
class TestLstmOpRerverse(TestLstmOp):
172+
def set_data(self):
173+
self.lod = [[0, 2, 6, 9]]
174+
self.D = 64
175+
self.sort_idx = [2, 6, 0, 3, 7, 1, 4, 8, 5]
176+
177+
self.act_gate = "sigmoid"
178+
self.act_cell = "tanh"
179+
self.act_cand = "tanh"
180+
181+
self.is_reverse = True
182+
183+
144184
if __name__ == "__main__":
145185
unittest.main()

0 commit comments

Comments
 (0)