Skip to content

Commit 8cfb3e5

Browse files
committed
Add python wrapper for GRU
1 parent 9b1a17a commit 8cfb3e5

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

doc/api/v2/fluid/layers.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ dynamic_lstm
1818
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
1919
:noindex:
2020

21+
dynamic_gru
22+
-----------
23+
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
24+
:noindex:
25+
2126
data
2227
----
2328
.. autofunction:: paddle.v2.fluid.layers.data

python/paddle/v2/fluid/layers/nn.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
'fc',
2626
'embedding',
2727
'dynamic_lstm',
28+
'dynamic_gru',
2829
'gru_unit',
2930
'linear_chain_crf',
3031
'crf_decoding',
@@ -366,6 +367,113 @@ def dynamic_lstm(input,
366367
return hidden, cell
367368

368369

370+
def dynamic_gru(input,
371+
size,
372+
param_attr=None,
373+
bias_attr=None,
374+
is_reverse=False,
375+
gate_activation='sigmoid',
376+
candidate_activation='tanh',
377+
h_0=None):
378+
"""
379+
**Dynamic GRU Layer**
380+
381+
Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
382+
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
383+
384+
The formula is as follows:
385+
386+
.. math::
387+
388+
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
389+
390+
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
391+
392+
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
393+
394+
h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}
395+
396+
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
397+
is the update gate and reset gate activation function and :math:`sigmoid`
398+
is usually used for it. :math:`act_c` is the activation function for
399+
candidate hidden state and :math:`tanh` is usually used for it.
400+
401+
Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
402+
the input :math:`x_{t}` are NOT included in this operator. Users can choose
403+
to use fully-connect layer before GRU layer.
404+
405+
Args:
406+
input(Variable): The input of dynamic_gru layer, which supports
407+
variable-time length input sequence. The underlying tensor in this
408+
Variable is a matrix with shape :math:`(T \\times 3D)`, where
409+
:math:`T` is the total time steps in this mini-batch, :math:`D`
410+
is the hidden size.
411+
size(int): The dimension of the gru cell.
412+
param_attr(ParamAttr|None): The parameter attribute for the learnable
413+
hidden-hidden weight matrix. Note:
414+
415+
- The shape of the weight matrix is :math:`(T \\times 3D)`, where
416+
:math:`D` is the hidden size.
417+
- All elements in the weight matrix can be divided into two parts.
418+
The first part are weights of the update gate and reset gate with
419+
shape :math:`(D \\times 2D)`, and the second part are weights for
420+
candidate hidden state with shape :math:`(D \\times D)`.
421+
bias_attr(ParamAttr): The parameter attribute for learnable the
422+
hidden-hidden bias.
423+
is_reverse(bool): Whether to compute reversed GRU, default
424+
:attr:`False`.
425+
gate_activation(str): The activation for update gate and reset gate.
426+
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
427+
activation(str): The activation for candidate hidden state.
428+
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
429+
430+
Returns:
431+
Variable: The hidden state of GRU. The shape is (T \\times D), and lod \
432+
is the same with the input.
433+
434+
Examples:
435+
.. code-block:: python
436+
437+
hidden_dim = 512
438+
x = fluid.layers.fc(input=data, size=hidden_dim * 3)
439+
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
440+
"""
441+
442+
helper = LayerHelper('gru', **locals())
443+
dtype = helper.input_dtype()
444+
445+
weight = helper.create_parameter(
446+
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
447+
bias = helper.create_parameter(
448+
attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True)
449+
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
450+
if h_0 != None:
451+
assert h_0.shape == (
452+
size, size), 'The shape of h0 should be(%d, %d)' % (size, size)
453+
inputs['h0'] = h_0
454+
455+
hidden = helper.create_tmp_variable(dtype)
456+
batch_gate = helper.create_tmp_variable(dtype)
457+
batch_reset_hidden_prev = helper.create_tmp_variable(dtype)
458+
batch_hidden = helper.create_tmp_variable(dtype)
459+
460+
helper.append_op(
461+
type='gru',
462+
inputs=inputs,
463+
outputs={
464+
'Hidden': hidden,
465+
'BatchGate': batch_gate,
466+
'BatchResetHiddenPrev': batch_reset_hidden_prev,
467+
'BatchHidden': batch_hidden
468+
},
469+
attrs={
470+
'is_reverse': is_reverse,
471+
'gate_activation': gate_activation,
472+
'activation': candidate_activation
473+
})
474+
return hidden
475+
476+
369477
def gru_unit(input,
370478
hidden,
371479
size,

0 commit comments

Comments
 (0)