62
62
'im2sequence' ,
63
63
'nce' ,
64
64
'beam_search' ,
65
+ 'row_conv' ,
65
66
]
66
67
67
68
@@ -193,7 +194,7 @@ def embedding(input,
193
194
"""
194
195
**Embedding Layer**
195
196
196
- This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
197
+ This layer is used to lookup embeddings of IDs, provided by :attr:`input`, in
197
198
a lookup table. The result of this lookup is the embedding of each ID in the
198
199
:attr:`input`.
199
200
@@ -208,8 +209,8 @@ def embedding(input,
208
209
is_sparse(bool): The flag indicating whether to use sparse update.
209
210
padding_idx(int|long|None): If :attr:`None`, it makes no effect to lookup.
210
211
Otherwise the given :attr:`padding_idx` indicates padding the output
211
- with zeros whenever lookup encounters it in :attr:`input`. If
212
- :math:`padding_idx < 0`, the padding_idx to use in lookup is
212
+ with zeros whenever lookup encounters it in :attr:`input`. If
213
+ :math:`padding_idx < 0`, the padding_idx to use in lookup is
213
214
:math:`size[0] + dim`.
214
215
param_attr(ParamAttr): Parameters for this layer
215
216
dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
@@ -396,9 +397,9 @@ def dynamic_gru(input,
396
397
"""
397
398
**Dynamic GRU Layer**
398
399
399
- Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
400
+ Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on
400
401
Sequence Modeling <https://arxiv.org/abs/1412.3555>`_
401
-
402
+
402
403
The formula is as follows:
403
404
404
405
.. math::
@@ -408,47 +409,47 @@ def dynamic_gru(input,
408
409
r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)
409
410
410
411
\\ tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)
411
-
412
+
412
413
h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\ tilde{h_t}
413
-
414
+
414
415
The :math:`\odot` is the element-wise product of the vectors. :math:`act_g`
415
- is the update gate and reset gate activation function and :math:`sigmoid`
416
- is usually used for it. :math:`act_c` is the activation function for
416
+ is the update gate and reset gate activation function and :math:`sigmoid`
417
+ is usually used for it. :math:`act_c` is the activation function for
417
418
candidate hidden state and :math:`tanh` is usually used for it.
418
419
419
420
Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on
420
421
the input :math:`x_{t}` are NOT included in this operator. Users can choose
421
- to use fully-connect layer before GRU layer.
422
+ to use fully-connect layer before GRU layer.
422
423
423
424
Args:
424
- input(Variable): The input of dynamic_gru layer, which supports
425
- variable-time length input sequence. The underlying tensor in this
425
+ input(Variable): The input of dynamic_gru layer, which supports
426
+ variable-time length input sequence. The underlying tensor in this
426
427
Variable is a matrix with shape :math:`(T \\ times 3D)`, where
427
- :math:`T` is the total time steps in this mini-batch, :math:`D`
428
+ :math:`T` is the total time steps in this mini-batch, :math:`D`
428
429
is the hidden size.
429
430
size(int): The dimension of the gru cell.
430
- param_attr(ParamAttr|None): The parameter attribute for the learnable
431
+ param_attr(ParamAttr|None): The parameter attribute for the learnable
431
432
hidden-hidden weight matrix. Note:
432
433
433
- - The shape of the weight matrix is :math:`(T \\ times 3D)`, where
434
+ - The shape of the weight matrix is :math:`(T \\ times 3D)`, where
434
435
:math:`D` is the hidden size.
435
- - All elements in the weight matrix can be divided into two parts.
436
+ - All elements in the weight matrix can be divided into two parts.
436
437
The first part are weights of the update gate and reset gate with
437
- shape :math:`(D \\ times 2D)`, and the second part are weights for
438
+ shape :math:`(D \\ times 2D)`, and the second part are weights for
438
439
candidate hidden state with shape :math:`(D \\ times D)`.
439
- bias_attr(ParamAttr): The parameter attribute for learnable the
440
+ bias_attr(ParamAttr): The parameter attribute for learnable the
440
441
hidden-hidden bias.
441
- is_reverse(bool): Whether to compute reversed GRU, default
442
+ is_reverse(bool): Whether to compute reversed GRU, default
442
443
:attr:`False`.
443
444
gate_activation(str): The activation for update gate and reset gate.
444
445
Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid".
445
- activation(str): The activation for candidate hidden state.
446
+ activation(str): The activation for candidate hidden state.
446
447
Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh".
447
448
448
449
Returns:
449
450
Variable: The hidden state of GRU. The shape is (T \\ times D), and lod \
450
451
is the same with the input.
451
-
452
+
452
453
Examples:
453
454
.. code-block:: python
454
455
@@ -2564,3 +2565,56 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
2564
2565
'paddings' : padding ,
2565
2566
})
2566
2567
return out
2568
+
2569
+
2570
+ def row_conv (input , future_context_size , param_attr = None , act = None ):
2571
+ """Row Conv Operator. This layer will apply lookahead convolution to
2572
+ **input**. The input variable should be a 2D LoDTensor with shape [T, D].
2573
+ Parameters with shape [future_context_size + 1, D] will be created. The math
2574
+ equation of row convolution is as follows:
2575
+
2576
+ .. math::
2577
+ Out_{i} = \sum_{j = i} ^ {i + \\ tau} X_{j} \odot W_{i - j}
2578
+
2579
+ In the above equation:
2580
+
2581
+ * :math:`Out_{i}`: The i-th row of output variable with shape [1, D].
2582
+ * :math:`\\ tau`: Future context size.
2583
+ * :math:`X_{j}`: The j-th row of input variable with shape [1, D].
2584
+ * :math:`W_{i-j}`: The (i-j)-th row of parameters with shape [1, D].
2585
+
2586
+ More details about row_conv please refer to the paper \
2587
+ (http://www.cs.cmu.edu/~dyogatam/papers/wang+etal.iclrworkshop2016.pdf) and
2588
+ the design document \
2589
+ (https://github.com/PaddlePaddle/Paddle/issues/2228#issuecomment-303903645).
2590
+
2591
+ Args:
2592
+ input (Variable): Input variable, a 2D LoDTensor with shape [T, D].
2593
+ future_context_size (int): Future context size. Please note, the shape
2594
+ of convolution kernel is [future_context_size + 1, D].
2595
+ param_attr (ParamAttr): Attributes of parameters, including
2596
+ name, initializer etc.
2597
+ act (str): Non-linear activation to be applied to output variable.
2598
+
2599
+ Returns:
2600
+ Variable: The output tensor with same shape as input tensor.
2601
+
2602
+ Examples:
2603
+ .. code-block:: python
2604
+
2605
+ x = fluid.layers.data(name='x', shape=[16],
2606
+ dtype='float32', lod_level=1)
2607
+ out = fluid.layers.row_conv(input=x, future_context_size=2)
2608
+ """
2609
+ helper = LayerHelper ('row_conv' , ** locals ())
2610
+ dtype = helper .input_dtype ()
2611
+ filter_shape = [future_context_size + 1 , input .shape [1 ]]
2612
+ filter_param = helper .create_parameter (
2613
+ attr = helper .param_attr , shape = filter_shape , dtype = dtype )
2614
+ out = helper .create_tmp_variable (dtype )
2615
+ helper .append_op (
2616
+ type = 'row_conv' ,
2617
+ inputs = {'X' : [input ],
2618
+ 'Filter' : [filter_param ]},
2619
+ outputs = {'Out' : [out ]})
2620
+ return helper .append_activation (out )
0 commit comments