@@ -227,6 +227,106 @@ def dynamic_lstm(input,
227
227
cell_activation = 'tanh' ,
228
228
candidate_activation = 'tanh' ,
229
229
dtype = 'float32' ):
230
+ """
231
+ **Dynamic LSTM Layer**
232
+
233
+ The defalut implementation is diagonal/peephole connection
234
+ (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
235
+
236
+ .. math:
237
+
238
+ i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i) \\
239
+
240
+ f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f) \\
241
+
242
+ \t ilde{c_t} = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c) \\
243
+
244
+ o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o) \\
245
+
246
+ c_t = f_t \odot c_{t-1} + i_t \odot \t ilde{c_t} \\
247
+
248
+ h_t = o_t \odot act_h(c_t)
249
+
250
+ where the W terms denote weight matrices (e.g. $W_{xi}$ is the matrix
251
+ of weights from the input gate to the input), $W_{ic}, W_{fc}, W_{oc}$
252
+ are diagonal weight matrices for peephole connections. In our implementation,
253
+ we use vectors to reprenset these diagonal weight matrices. The b terms
254
+ denote bias vectors ($b_i$ is the input gate bias vector), $\sigma$
255
+ is the non-line activations, such as logistic sigmoid function, and
256
+ $i, f, o$ and $c$ are the input gate, forget gate, output gate,
257
+ and cell activation vectors, respectively, all of which have the same size as
258
+ the cell output activation vector $h$.
259
+
260
+ The $\odot$ is the element-wise product of the vectors. $act_g$ and $act_h$
261
+ are the cell input and cell output activation functions and `tanh` is usually
262
+ used for them. $\t ilde{c_t}$ is also called candidate hidden state,
263
+ which is computed based on the current input and the previous hidden state.
264
+
265
+ Set `use_peepholes` False to disable peephole connection. The formula
266
+ is omitted here, please refer to the paper
267
+ http://www.bioinf.jku.at/publications/older/2604.pdf for details.
268
+
269
+ Note that these $W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}$
270
+ operations on the input $x_{t}$ are NOT included in this operator.
271
+ Users can choose to use fully-connect operator before LSTM operator.
272
+
273
+ Args:
274
+ def dynamic_lstm(input,
275
+ size,
276
+ param_attr=None,
277
+ bias_attr=None,
278
+ use_peepholes=True,
279
+ is_reverse=False,
280
+ gate_activation='sigmoid',
281
+ cell_activation='tanh',
282
+ candidate_activation='tanh',
283
+ dtype='float32'):
284
+ input(Variable): The input of dynamic_lstm layer, which support
285
+ variable-time length input sequence. The underlying tensor in
286
+ this Variable is a matrix with shape (T X 4D), where T is the
287
+ total time steps in this mini-batch, D is the hidden size.
288
+ size(int): The size of input.
289
+ param_attr(ParamAttr): The parameter attribute for the learnable
290
+ hidden-hidden weights.
291
+ - The shape is (D x 4D), where D is the hidden size.
292
+ - param_attr = {W_ch, W_ih, W_fh, W_oh}
293
+ bias_attr(ParamAttr): The bias attribute for the learnable bias
294
+ weights, which contains two parts: input-hidden bias weight
295
+ and peephole connections weight if setting `use_peepholes` to True.
296
+ 1. `use_peepholes = False`
297
+ - The shape is (1 x 4D).
298
+ - Bias = {b_c, b_i, b_f, b_o}.
299
+ 2. `use_peepholes = True`
300
+ - The shape is (1 x 7D).
301
+ - Bias = {b_c, b_i, b_f, b_o, W_ic, W_fc, W_oc}.
302
+ use_peepholes(bool, defalut: True): whether to enable diagonal/peephole
303
+ connections.
304
+ is_reverse(bool, defalut: False): whether to compute reversed LSTM.
305
+ gate_activation(string, choices: "sigmoid", "tanh", "relu", "identity",
306
+ default: "sigmoid"): The activation for input gate, forget gate and
307
+ output gate.
308
+ cell_activation(string, choices: "sigmoid", "tanh", "relu", "identity",
309
+ default: "tanh"): The activation for cell output.
310
+ candidate_activation(string, choices: "sigmoid", "tanh", "relu",
311
+ "identity", default: "tanh"): The activation for candidate hidden
312
+ state.
313
+ dtype(string, )
314
+
315
+ Returns:
316
+ hidden(Variable): the hidden state of LSTM layer. The shape is (T x D),
317
+ and lod is the same with the `input`.
318
+ cell(Variable): the cell state of LSTM layer. The shape is (T x D), and
319
+ lod is the same with the `input`.
320
+
321
+ Example:
322
+ .. code-block:: python
323
+
324
+ hidden_dim = 512
325
+ forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
326
+ act='tanh', bias_attr=True)
327
+ forward, _ = fluid.layers.dynamic_lstm(
328
+ input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
329
+ """
230
330
helper = LayerHelper ('lstm' , ** locals ())
231
331
size = size / 4
232
332
weight = helper .create_parameter (
0 commit comments