|
26 | 26 | 'fc',
|
27 | 27 | 'embedding',
|
28 | 28 | 'dynamic_lstm',
|
| 29 | + 'dynamic_gru', |
29 | 30 | 'gru_unit',
|
30 | 31 | 'linear_chain_crf',
|
31 | 32 | 'crf_decoding',
|
@@ -368,6 +369,113 @@ def dynamic_lstm(input,
|
368 | 369 | return hidden, cell
|
369 | 370 |
|
370 | 371 |
|
| 372 | +def dynamic_gru(input, |
| 373 | + size, |
| 374 | + param_attr=None, |
| 375 | + bias_attr=None, |
| 376 | + is_reverse=False, |
| 377 | + gate_activation='sigmoid', |
| 378 | + candidate_activation='tanh', |
| 379 | + h_0=None): |
| 380 | + """ |
| 381 | + **Dynamic GRU Layer** |
| 382 | +
|
| 383 | + Refer to `Empirical Evaluation of Gated Recurrent Neural Networks on |
| 384 | + Sequence Modeling <https://arxiv.org/abs/1412.3555>`_ |
| 385 | + |
| 386 | + The formula is as follows: |
| 387 | +
|
| 388 | + .. math:: |
| 389 | +
|
| 390 | + u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u) |
| 391 | +
|
| 392 | + r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r) |
| 393 | +
|
| 394 | + \\tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c) |
| 395 | + |
| 396 | + h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \\tilde{h_t} |
| 397 | + |
| 398 | + The :math:`\odot` is the element-wise product of the vectors. :math:`act_g` |
| 399 | + is the update gate and reset gate activation function and :math:`sigmoid` |
| 400 | + is usually used for it. :math:`act_c` is the activation function for |
| 401 | + candidate hidden state and :math:`tanh` is usually used for it. |
| 402 | +
|
| 403 | + Note that these :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` operations on |
| 404 | + the input :math:`x_{t}` are NOT included in this operator. Users can choose |
| 405 | + to use fully-connect layer before GRU layer. |
| 406 | +
|
| 407 | + Args: |
| 408 | + input(Variable): The input of dynamic_gru layer, which supports |
| 409 | + variable-time length input sequence. The underlying tensor in this |
| 410 | + Variable is a matrix with shape :math:`(T \\times 3D)`, where |
| 411 | + :math:`T` is the total time steps in this mini-batch, :math:`D` |
| 412 | + is the hidden size. |
| 413 | + size(int): The dimension of the gru cell. |
| 414 | + param_attr(ParamAttr|None): The parameter attribute for the learnable |
| 415 | + hidden-hidden weight matrix. Note: |
| 416 | +
|
| 417 | + - The shape of the weight matrix is :math:`(T \\times 3D)`, where |
| 418 | + :math:`D` is the hidden size. |
| 419 | + - All elements in the weight matrix can be divided into two parts. |
| 420 | + The first part are weights of the update gate and reset gate with |
| 421 | + shape :math:`(D \\times 2D)`, and the second part are weights for |
| 422 | + candidate hidden state with shape :math:`(D \\times D)`. |
| 423 | + bias_attr(ParamAttr): The parameter attribute for learnable the |
| 424 | + hidden-hidden bias. |
| 425 | + is_reverse(bool): Whether to compute reversed GRU, default |
| 426 | + :attr:`False`. |
| 427 | + gate_activation(str): The activation for update gate and reset gate. |
| 428 | + Choices = ["sigmoid", "tanh", "relu", "identity"], default "sigmoid". |
| 429 | + activation(str): The activation for candidate hidden state. |
| 430 | + Choices = ["sigmoid", "tanh", "relu", "identity"], default "tanh". |
| 431 | +
|
| 432 | + Returns: |
| 433 | + Variable: The hidden state of GRU. The shape is (T \\times D), and lod \ |
| 434 | + is the same with the input. |
| 435 | + |
| 436 | + Examples: |
| 437 | + .. code-block:: python |
| 438 | +
|
| 439 | + hidden_dim = 512 |
| 440 | + x = fluid.layers.fc(input=data, size=hidden_dim * 3) |
| 441 | + hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim) |
| 442 | + """ |
| 443 | + |
| 444 | + helper = LayerHelper('gru', **locals()) |
| 445 | + dtype = helper.input_dtype() |
| 446 | + |
| 447 | + weight = helper.create_parameter( |
| 448 | + attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype) |
| 449 | + bias = helper.create_parameter( |
| 450 | + attr=helper.bias_attr, shape=[1, 3 * size], dtype=dtype, is_bias=True) |
| 451 | + inputs = {'Input': input, 'Weight': weight, 'Bias': bias} |
| 452 | + if h_0 != None: |
| 453 | + assert h_0.shape == ( |
| 454 | + size, size), 'The shape of h0 should be(%d, %d)' % (size, size) |
| 455 | + inputs['h0'] = h_0 |
| 456 | + |
| 457 | + hidden = helper.create_tmp_variable(dtype) |
| 458 | + batch_gate = helper.create_tmp_variable(dtype) |
| 459 | + batch_reset_hidden_prev = helper.create_tmp_variable(dtype) |
| 460 | + batch_hidden = helper.create_tmp_variable(dtype) |
| 461 | + |
| 462 | + helper.append_op( |
| 463 | + type='gru', |
| 464 | + inputs=inputs, |
| 465 | + outputs={ |
| 466 | + 'Hidden': hidden, |
| 467 | + 'BatchGate': batch_gate, |
| 468 | + 'BatchResetHiddenPrev': batch_reset_hidden_prev, |
| 469 | + 'BatchHidden': batch_hidden |
| 470 | + }, |
| 471 | + attrs={ |
| 472 | + 'is_reverse': is_reverse, |
| 473 | + 'gate_activation': gate_activation, |
| 474 | + 'activation': candidate_activation |
| 475 | + }) |
| 476 | + return hidden |
| 477 | + |
| 478 | + |
371 | 479 | def gru_unit(input,
|
372 | 480 | hidden,
|
373 | 481 | size,
|
|
0 commit comments