Skip to content

Commit c648244

Browse files
authored
Merge pull request #7766 from guoshengCS/add-python-GRU
Add python wrapper for GRU
2 parents b455502 + 8cfb3e5 commit c648244

File tree

2 files changed

+113
-0
lines changed

2 files changed

+113
-0
lines changed

doc/api/v2/fluid/layers.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ dynamic_lstm
1818
.. autofunction:: paddle.v2.fluid.layers.dynamic_lstm
1919
:noindex:
2020

21+
dynamic_gru
22+
-----------
23+
.. autofunction:: paddle.v2.fluid.layers.dynamic_gru
24+
:noindex:
25+
2126
data
2227
----
2328
.. autofunction:: paddle.v2.fluid.layers.data

python/paddle/v2/fluid/layers/nn.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
'fc',
2727
'embedding',
2828
'dynamic_lstm',
29+
'dynamic_gru',
2930
'gru_unit',
3031
'linear_chain_crf',
3132
'crf_decoding',
@@ -368,6 +369,113 @@ def dynamic_lstm(input,
368369
return hidden, cell
369370

370371

372+
def dynamic_gru(input,
373+
size,
374+
param_attr=None,
375+
bias_attr=None,
376+
is_reverse=False,
377+
gate_activation='sigmoid',
378+
candidate_activation='tanh',
379+
h_0=None):
380+
"""
381+
**Dynamic GRU Layer**
382+
383+
Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
384+
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
385+
386+
The formula is as follows:
387+
388+
.. math::
389+
390+
u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)
391+
392+
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
393+
394+
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
395+
396+
h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}
397+
398+
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
399+
is the update gate and reset gate activation function and :math:`sigmoid`
400+
is usually used for it. :math:`act_c` is the activation function for
401+
candidate hidden state and :math:`tanh` is usually used for it.
402+
403+
Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
404+
the input :math:`x_{t}` are NOT included in this operator. Users can choose
405+
to use fully-connect layer before GRU layer.
406+
407+
Args:
408+
input(Variable): The input of dynamic_gru layer, which supports
409+
variable-time length input sequence. The underlying tensor in this
410+
Variable is a matrix with shape :math:`(T \\times 3D)`, where
411+
:math:`T` is the total time steps in this mini-batch, :math:`D`
412+
is the hidden size.
413+
size(int): The dimension of the gru cell.
414+
param_attr(ParamAttr|None): The parameter attribute for the learnable
415+
hidden-hidden weight matrix. Note:
416+
417+
- The shape of the weight matrix is :math:`(T \\times 3D)`, where
418+
:math:`D` is the hidden size.
419+
- All elements in the weight matrix can be divided into two parts.
420+
The first part are weights of the update gate and reset gate with
421+
shape :math:`(D \\times 2D)`, and the second part are weights for
422+
candidate hidden state with shape :math:`(D \\times D)`.
423+
bias_attr(ParamAttr): The parameter attribute for learnable the
424+
hidden-hidden bias.
425+
is_reverse(bool): Whether to compute reversed GRU, default
426+
:attr:`False`.
427+
gate_activation(str): The activation for update gate and reset gate.
428+
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
429+
activation(str): The activation for candidate hidden state.
430+
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
431+
432+
Returns:
433+
Variable: The hidden state of GRU. The shape is (T \\times D), and lod \
434+
is the same with the input.
435+
436+
Examples:
437+
.. code-block:: python
438+
439+
hidden_dim = 512
440+
x = fluid.layers.fc(input=data, size=hidden_dim * 3)
441+
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
442+
"""
443+
444+
helper = LayerHelper('gru', **locals())
445+
dtype = helper.input_dtype()
446+
447+
weight = helper.create_parameter(
448+
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
449+
bias = helper.create_parameter(
450+
attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True)
451+
inputs = {'Input': input, 'Weight': weight, 'Bias': bias}
452+
if h_0 != None:
453+
assert h_0.shape == (
454+
size, size), 'The shape of h0 should be(%d, %d)' % (size, size)
455+
inputs['h0'] = h_0
456+
457+
hidden = helper.create_tmp_variable(dtype)
458+
batch_gate = helper.create_tmp_variable(dtype)
459+
batch_reset_hidden_prev = helper.create_tmp_variable(dtype)
460+
batch_hidden = helper.create_tmp_variable(dtype)
461+
462+
helper.append_op(
463+
type='gru',
464+
inputs=inputs,
465+
outputs={
466+
'Hidden': hidden,
467+
'BatchGate': batch_gate,
468+
'BatchResetHiddenPrev': batch_reset_hidden_prev,
469+
'BatchHidden': batch_hidden
470+
},
471+
attrs={
472+
'is_reverse': is_reverse,
473+
'gate_activation': gate_activation,
474+
'activation': candidate_activation
475+
})
476+
return hidden
477+
478+
371479
def gru_unit(input,
372480
hidden,
373481
size,

0 commit comments

Comments
 (0)