Skip to content

Commit de89b47

Browse files
authored
Merge pull request #7575 from pkuyym/fix-7555
Add pyton wrapper for row conv operator.
2 parents 9609c17 + 630a864 commit de89b47

File tree

3 files changed

+88
-21
lines changed

3 files changed

+88
-21
lines changed

doc/api/v2/fluid/layers.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,3 +529,8 @@ sequence_reshape
529529
----------------
530530
.. autofunction:: paddle.v2.fluid.layers.sequence_reshape
531531
:noindex:
532+
533+
row_conv
534+
--------
535+
.. autofunction:: paddle.v2.fluid.layers.row_conv
536+
:noindex:

python/paddle/v2/fluid/layers/nn.py

Lines changed: 75 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
'im2sequence',
6363
'nce',
6464
'beam_search',
65+
'row_conv',
6566
]
6667

6768

@@ -193,7 +194,7 @@ def embedding(input,
193194
"""
194195
**Embedding Layer**
195196
196-
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
197+
This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
197198
a lookup table. The result of this lookup is the embedding of each ID in the
198199
:attr:`input`.
199200
@@ -208,8 +209,8 @@ def embedding(input,
208209
is_sparse(bool): The flag indicating whether to use sparse update.
209210
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
210211
Otherwise the given :attr:`padding_idx` indicates padding the output
211-
with zeros whenever lookup encounters it in :attr:`input`. If
212-
:math:`padding_idx < 0`, the padding_idx to use in lookup is
212+
with zeros whenever lookup encounters it in :attr:`input`. If
213+
:math:`padding_idx < 0`, the padding_idx to use in lookup is
213214
:math:`size[0] + dim`.
214215
param_attr(ParamAttr): Parameters for this layer
215216
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
@@ -396,9 +397,9 @@ def dynamic_gru(input,
396397
"""
397398
**Dynamic GRU Layer**
398399
399-
Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
400+
Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
400401
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
401-
402+
402403
The formula is as follows:
403404
404405
.. math::
@@ -408,47 +409,47 @@ def dynamic_gru(input,
408409
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
409410
410411
\\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
411-
412+
412413
h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t}
413-
414+
414415
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
415-
is the update gate and reset gate activation function and :math:`sigmoid`
416-
is usually used for it. :math:`act_c` is the activation function for
416+
is the update gate and reset gate activation function and :math:`sigmoid`
417+
is usually used for it. :math:`act_c` is the activation function for
417418
candidate hidden state and :math:`tanh` is usually used for it.
418419
419420
Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
420421
the input :math:`x_{t}` are NOT included in this operator. Users can choose
421-
to use fully-connect layer before GRU layer.
422+
to use fully-connect layer before GRU layer.
422423
423424
Args:
424-
input(Variable): The input of dynamic_gru layer, which supports
425-
variable-time length input sequence. The underlying tensor in this
425+
input(Variable): The input of dynamic_gru layer, which supports
426+
variable-time length input sequence. The underlying tensor in this
426427
Variable is a matrix with shape :math:`(T \\times 3D)`, where
427-
:math:`T` is the total time steps in this mini-batch, :math:`D`
428+
:math:`T` is the total time steps in this mini-batch, :math:`D`
428429
is the hidden size.
429430
size(int): The dimension of the gru cell.
430-
param_attr(ParamAttr|None): The parameter attribute for the learnable
431+
param_attr(ParamAttr|None): The parameter attribute for the learnable
431432
hidden-hidden weight matrix. Note:
432433
433-
- The shape of the weight matrix is :math:`(T \\times 3D)`, where
434+
- The shape of the weight matrix is :math:`(T \\times 3D)`, where
434435
:math:`D` is the hidden size.
435-
- All elements in the weight matrix can be divided into two parts.
436+
- All elements in the weight matrix can be divided into two parts.
436437
The first part are weights of the update gate and reset gate with
437-
shape :math:`(D \\times 2D)`, and the second part are weights for
438+
shape :math:`(D \\times 2D)`, and the second part are weights for
438439
candidate hidden state with shape :math:`(D \\times D)`.
439-
bias_attr(ParamAttr): The parameter attribute for learnable the
440+
bias_attr(ParamAttr): The parameter attribute for learnable the
440441
hidden-hidden bias.
441-
is_reverse(bool): Whether to compute reversed GRU, default
442+
is_reverse(bool): Whether to compute reversed GRU, default
442443
:attr:`False`.
443444
gate_activation(str): The activation for update gate and reset gate.
444445
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
445-
activation(str): The activation for candidate hidden state.
446+
activation(str): The activation for candidate hidden state.
446447
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
447448
448449
Returns:
449450
Variable: The hidden state of GRU. The shape is (T \\times D), and lod \
450451
is the same with the input.
451-
452+
452453
Examples:
453454
.. code-block:: python
454455
@@ -2564,3 +2565,56 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
25642565
'paddings': padding,
25652566
})
25662567
return out
2568+
2569+
2570+
def row_conv(input, future_context_size, param_attr=None, act=None):
2571+
"""Row Conv Operator. This layer will apply lookahead convolution to
2572+
**input**. The input variable should be a 2D LoDTensor with shape [T, D].
2573+
Parameters with shape [future_context_size + 1, D] will be created. The math
2574+
equation of row convolution is as follows:
2575+
2576+
.. math::
2577+
Out_{i} = \sum_{j = i} ^ {i + \\tau} X_{j} \odot W_{i - j}
2578+
2579+
In the above equation:
2580+
2581+
* :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
2582+
* :math:`\\tau`: Future context size.
2583+
* :math:`X_{j}`: The j-th row of input variable with shape [1, D].
2584+
* :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
2585+
2586+
More details about row_conv please refer to the paper \
2587+
(http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
2588+
the design document \
2589+
(https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).
2590+
2591+
Args:
2592+
input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
2593+
future_context_size (int): Future context size. Please note, the shape
2594+
of convolution kernel is [future_context_size + 1, D].
2595+
param_attr (ParamAttr): Attributes of parameters, including
2596+
name, initializer etc.
2597+
act (str): Non-linear activation to be applied to output variable.
2598+
2599+
Returns:
2600+
Variable: The output tensor with same shape as input tensor.
2601+
2602+
Examples:
2603+
.. code-block:: python
2604+
2605+
x = fluid.layers.data(name='x', shape=[16],
2606+
dtype='float32', lod_level=1)
2607+
out = fluid.layers.row_conv(input=x, future_context_size=2)
2608+
"""
2609+
helper = LayerHelper('row_conv', **locals())
2610+
dtype = helper.input_dtype()
2611+
filter_shape = [future_context_size + 1, input.shape[1]]
2612+
filter_param = helper.create_parameter(
2613+
attr=helper.param_attr, shape=filter_shape, dtype=dtype)
2614+
out = helper.create_tmp_variable(dtype)
2615+
helper.append_op(
2616+
type='row_conv',
2617+
inputs={'X': [input],
2618+
'Filter': [filter_param]},
2619+
outputs={'Out': [out]})
2620+
return helper.append_activation(out)

python/paddle/v2/fluid/tests/test_layers.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,14 @@ def test_nce(self):
271271
self.assertIsNotNone(avg_loss)
272272
print(str(default_main_program()))
273273

274+
def test_row_conv(self):
275+
program = Program()
276+
with program_guard(program):
277+
x = layers.data(name='x', shape=[16], dtype='float32', lod_level=1)
278+
out = layers.row_conv(input=x, future_context_size=2)
279+
self.assertIsNotNone(out)
280+
print(str(program))
281+
274282

275283
if __name__ == '__main__':
276284
unittest.main()

0 commit comments

Comments
 (0)