@@ -206,6 +206,102 @@ def dynamic_lstm(input,
206
206
cell_activation = 'tanh' ,
207
207
candidate_activation = 'tanh' ,
208
208
dtype = 'float32' ):
209
+ """
210
+ **Dynamic LSTM Layer**
211
+
212
+ The defalut implementation is diagonal/peephole connection
213
+ (https://arxiv.org/pdf/1402.1128.pdf), the formula is as follows:
214
+
215
+ .. math::
216
+
217
+ i_t & = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + W_{ic}c_{t-1} + b_i)
218
+
219
+ f_t & = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + W_{fc}c_{t-1} + b_f)
220
+
221
+ \\ tilde{c_t} & = act_g(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
222
+
223
+ o_t & = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + W_{oc}c_t + b_o)
224
+
225
+ c_t & = f_t \odot c_{t-1} + i_t \odot \\ tilde{c_t}
226
+
227
+ h_t & = o_t \odot act_h(c_t)
228
+
229
+ where the :math:`W` terms denote weight matrices (e.g. :math:`W_{xi}` is
230
+ the matrix of weights from the input gate to the input), :math:`W_{ic}, \
231
+ W_{fc}, W_{oc}` are diagonal weight matrices for peephole connections. In
232
+ our implementation, we use vectors to reprenset these diagonal weight
233
+ matrices. The :math:`b` terms denote bias vectors (:math:`b_i` is the input
234
+ gate bias vector), :math:`\sigma` is the non-line activations, such as
235
+ logistic sigmoid function, and :math:`i, f, o` and :math:`c` are the input
236
+ gate, forget gate, output gate, and cell activation vectors, respectively,
237
+ all of which have the same size as the cell output activation vector :math:`h`.
238
+
239
+ The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
240
+ and :math:`act_h` are the cell input and cell output activation functions
241
+ and `tanh` is usually used for them. :math:`\\ tilde{c_t}` is also called
242
+ candidate hidden state, which is computed based on the current input and
243
+ the previous hidden state.
244
+
245
+ Set `use_peepholes` to `False` to disable peephole connection. The formula
246
+ is omitted here, please refer to the paper
247
+ http://www.bioinf.jku.at/publications/older/2604.pdf for details.
248
+
249
+ Note that these :math:`W_{xi}x_{t}, W_{xf}x_{t}, W_{xc}x_{t}, W_{xo}x_{t}`
250
+ operations on the input :math:`x_{t}` are NOT included in this operator.
251
+ Users can choose to use fully-connect layer before LSTM layer.
252
+
253
+ Args:
254
+ input(Variable): The input of dynamic_lstm layer, which supports
255
+ variable-time length input sequence. The underlying
256
+ tensor in this Variable is a matrix with shape
257
+ (T X 4D), where T is the total time steps in this
258
+ mini-batch, D is the hidden size.
259
+ size(int): 4 * hidden size.
260
+ param_attr(ParamAttr): The parameter attribute for the learnable
261
+ hidden-hidden weights.
262
+
263
+ - The shape is (D x 4D), where D is the hidden
264
+ size.
265
+ - Weights = {:math:`W_{ch}, W_{ih}, \
266
+ W_{fh}, W_{oh}`}
267
+ bias_attr(ParamAttr): The bias attribute for the learnable bias
268
+ weights, which contains two parts, input-hidden
269
+ bias weights and peephole connections weights if
270
+ setting `use_peepholes` to `True`.
271
+
272
+ 1. `use_peepholes = False`
273
+ - The shape is (1 x 4D).
274
+ - Biases = {:math:`b_c, b_i, b_f, b_o`}.
275
+ 2. `use_peepholes = True`
276
+ - The shape is (1 x 7D).
277
+ - Biases = { :math:`b_c, b_i, b_f, b_o, W_{ic}, \
278
+ W_{fc}, W_{oc}`}.
279
+ use_peepholes(bool): Whether to enable diagonal/peephole connections,
280
+ default `True`.
281
+ is_reverse(bool): Whether to compute reversed LSTM, default `False`.
282
+ gate_activation(str): The activation for input gate, forget gate and
283
+ output gate. Choices = ["sigmoid", "tanh", "relu",
284
+ "identity"], default "sigmoid".
285
+ cell_activation(str): The activation for cell output. Choices = ["sigmoid",
286
+ "tanh", "relu", "identity"], default "tanh".
287
+ candidate_activation(str): The activation for candidate hidden state.
288
+ Choices = ["sigmoid", "tanh", "relu", "identity"],
289
+ default "tanh".
290
+ dtype(str): Data type. Choices = ["float32", "float64"], default "float32".
291
+
292
+ Returns:
293
+ tuple: The hidden state, and cell state of LSTM. The shape of both \
294
+ is (T x D), and lod is the same with the `input`.
295
+
296
+ Examples:
297
+ .. code-block:: python
298
+
299
+ hidden_dim = 512
300
+ forward_proj = fluid.layers.fc(input=input_seq, size=hidden_dim * 4,
301
+ act=None, bias_attr=None)
302
+ forward, _ = fluid.layers.dynamic_lstm(
303
+ input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
304
+ """
209
305
helper = LayerHelper ('lstm' , ** locals ())
210
306
size = size / 4
211
307
weight = helper .create_parameter (
0 commit comments