|
| 1 | +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | +http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. */ |
| 14 | + |
| 15 | +#include <string> |
| 16 | +#include "paddle/fluid/framework/op_registry.h" |
| 17 | + |
| 18 | +namespace paddle { |
| 19 | +namespace operators { |
| 20 | + |
| 21 | +class CudnnLSTMOp : public framework::OperatorWithKernel { |
| 22 | + public: |
| 23 | + using framework::OperatorWithKernel::OperatorWithKernel; |
| 24 | + |
| 25 | + void InferShape(framework::InferShapeContext* ctx) const override { |
| 26 | + PADDLE_ENFORCE(ctx->HasInput("Input"), |
| 27 | + "Input(Input) of LSTM should not be null."); |
| 28 | + PADDLE_ENFORCE(ctx->HasInput("W"), |
| 29 | + "Input(Weight) of LSTM should not be null."); |
| 30 | + |
| 31 | + PADDLE_ENFORCE(ctx->HasInput("InitH"), |
| 32 | + "Input(init_h) of LSTM should not be null."); |
| 33 | + PADDLE_ENFORCE(ctx->HasInput("InitC"), |
| 34 | + "Input(init_c) of LSTM should not be null."); |
| 35 | + PADDLE_ENFORCE(ctx->HasInput("Cache"), |
| 36 | + "Input(Cache) of LSTM should not be null."); |
| 37 | + PADDLE_ENFORCE(ctx->HasOutput("Out"), |
| 38 | + "Output(Out) of LSTM should not be null."); |
| 39 | + PADDLE_ENFORCE(ctx->HasOutput("last_h"), |
| 40 | + "Output(last_h) of LSTM should not be null."); |
| 41 | + PADDLE_ENFORCE(ctx->HasOutput("last_c"), |
| 42 | + "Output(last_c) of LSTM should not be null."); |
| 43 | + |
| 44 | + auto in_dims = ctx->GetInputDim("Input"); |
| 45 | + PADDLE_ENFORCE_EQ(in_dims.size(), 3, "Input(X)'s rank must be 3."); |
| 46 | + |
| 47 | + ctx->SetOutputDim("Out", ctx->GetInputDim("Input")); |
| 48 | + ctx->SetOutputDim("last_h", ctx->GetInputDim("InitH")); |
| 49 | + ctx->SetOutputDim("last_c", ctx->GetInputDim("InitC")); |
| 50 | + } |
| 51 | +}; |
| 52 | + |
| 53 | +class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker { |
| 54 | + public: |
| 55 | + void Make() override { |
| 56 | + AddInput( |
| 57 | + "Input", |
| 58 | + "(Tensor) RNN input tensor, which support variable-time length input " |
| 59 | + "sequence." |
| 60 | + "The shape of the Tensor MUST be ( seq_len * batch_size * input_size)" |
| 61 | + "seq_len is the total time step in this mini-batch (CAN be change in " |
| 62 | + "different batch)" |
| 63 | + "batch_size is the instance number of this batch" |
| 64 | + "input_size is the hidden size of the input." |
| 65 | + "input_hidden_size and the hidden_size in the next may not be same"); |
| 66 | + AddInput("InitH", |
| 67 | + "(Tensor) the initial hidden state of the LSTM" |
| 68 | + "input. This is a tensor with shape (num_layers x batch_size x " |
| 69 | + "hidden_size)" |
| 70 | + "and When is_bidirec is True, the shape will be (num_layers*2 x " |
| 71 | + "batch_size x hidden_size)"); |
| 72 | + AddInput("InitC", |
| 73 | + "(Tensor) the initial cell state of the LSTm " |
| 74 | + "input. This is a tensor with shape (num_layers x batch_size x " |
| 75 | + "hidden_size)" |
| 76 | + "and When is_bidirec is True, the shape will be (num_layers*2 x " |
| 77 | + "batch_size x hidden_size)"); |
| 78 | + AddInput("W", |
| 79 | + "(Tensor) the learnable hidden-hidden weights." |
| 80 | + " The shape is (N), where N is total weight size of the LSTM. " |
| 81 | + " cudnn concatenate all the weight to one Tensor"); |
| 82 | + AddInput("Cache", |
| 83 | + "The cache of dropout op, a RAW type variable including random " |
| 84 | + "number generator states and some descriptors, which is used in " |
| 85 | + "cudnn kernel.") |
| 86 | + .AsDispensable(); |
| 87 | + AddOutput("Out", |
| 88 | + "(Tensor) the hidden state of LSTM operator. " |
| 89 | + "The shape is ( seq_len x batch_size x hidden_size) if " |
| 90 | + "is_bidirec is False" |
| 91 | + "and When is_bidirec is True, the shape will be ( seq_len x " |
| 92 | + "batch_size x hidden_size * 2) "); |
| 93 | + AddOutput("last_h", |
| 94 | + "(Tensor) the hidden state of the last step. " |
| 95 | + "The shape is ( num_layers x batch_size x hidden_size) if " |
| 96 | + "is_bidirec is False" |
| 97 | + "and When is_bidirec is True, the shape will be (num_layers*2 x " |
| 98 | + "batch_size x hidden_size)"); |
| 99 | + AddOutput("last_c", |
| 100 | + "(Tensor) the cell state of the last step" |
| 101 | + "The shape is ( num_layers x batch_size x hidden_size) if " |
| 102 | + "is_bidirec is False" |
| 103 | + "and When is_bidirect is True, the shape will be (num_layers*2 x " |
| 104 | + "batch_size x hidden_size*2)"); |
| 105 | + AddAttr<int>("max_len", |
| 106 | + "max length of the LSTM op" |
| 107 | + "the first dim of the Input can NOT be greater than max_len") |
| 108 | + .SetDefault(20); |
| 109 | + AddAttr<float>( |
| 110 | + "dropout_prob", |
| 111 | + "dropout prob of the dropout op" |
| 112 | + "the dropout ONLY work between lstm layers, not between time steps" |
| 113 | + "There is no dropout work on the Out tensor") |
| 114 | + .SetDefault(0.0); |
| 115 | + AddAttr<bool>("is_bidirec", |
| 116 | + "is_bidirec" |
| 117 | + "if it is bidirection rnn" |
| 118 | + "The will affect the shape of the Out, last_h, and last_c") |
| 119 | + .SetDefault(false); |
| 120 | + AddAttr<int>("input_size", "input size ot the Input Tensor").SetDefault(10); |
| 121 | + AddAttr<int>("hidden_size", "hidden size of the LSTM").SetDefault(100); |
| 122 | + AddAttr<int>("num_layers", "the total layer number of the LSTM") |
| 123 | + .SetDefault(1); |
| 124 | + AddAttr<bool>("is_test", "True if in test phase.").SetDefault(false); |
| 125 | + AddAttr<int>("seed", "seed to used if fix_seed is True").SetDefault(-1); |
| 126 | + AddComment(R"DOC( |
| 127 | +CUDNN LSTM implementation |
| 128 | +
|
| 129 | +A four-gate Long Short-Term Memory network with no peephole connections. |
| 130 | +In the forward pass the output ht and cell output ct for a given iteration can be computed from the recurrent input ht-1, |
| 131 | +the cell input ct-1 and the previous layer input xt given matrices W, R and biases bW, bR from the following equations: |
| 132 | +
|
| 133 | +$$ i_t = sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i) $$ |
| 134 | +
|
| 135 | +$$ f_t = sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f) $$ |
| 136 | +
|
| 137 | +$$ o_t = sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o) $$ |
| 138 | +
|
| 139 | +$$ \\tilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c) $$ |
| 140 | +
|
| 141 | +$$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$ |
| 142 | +
|
| 143 | +$$ h_t = o_t \\odot tanh(c_t) $$ |
| 144 | +
|
| 145 | +- W terms denote weight matrices (e.g. $W_{ix}$ is the matrix |
| 146 | + of weights from the input gate to the input) |
| 147 | +- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector). |
| 148 | +- sigmoid is the logistic sigmoid function. |
| 149 | +- $i, f, o$ and $c$ are the input gate, forget gate, output gate, |
| 150 | + and cell activation vectors, respectively, all of which have the same size as |
| 151 | + the cell output activation vector $h$. |
| 152 | +- The $\odot$ is the element-wise product of the vectors. |
| 153 | +- `tanh` is the activation functions. |
| 154 | +- $\tilde{c_t}$ is also called candidate hidden state, |
| 155 | + which is computed based on the current input and the previous hidden state. |
| 156 | +
|
| 157 | +Where sigmoid is the sigmoid operator: sigmoid(x) = 1 / (1 + e^-x), * represents a point-wise multiplication, |
| 158 | +X represensts a matrix multiplication |
| 159 | +
|
| 160 | +
|
| 161 | +)DOC"); |
| 162 | + } |
| 163 | +}; |
| 164 | + |
| 165 | +class CudnnLSTMGradOp : public framework::OperatorWithKernel { |
| 166 | + public: |
| 167 | + using framework::OperatorWithKernel::OperatorWithKernel; |
| 168 | + |
| 169 | + void InferShape(framework::InferShapeContext* ctx) const override { |
| 170 | + PADDLE_ENFORCE(ctx->HasInput("Input"), |
| 171 | + "Input(Input) of LSTM should not be null."); |
| 172 | + PADDLE_ENFORCE(ctx->HasInput("W"), "Input(W) of LSTM should not be null."); |
| 173 | + PADDLE_ENFORCE(ctx->HasInput("last_h"), |
| 174 | + "Input(last_h) of LSTM should not be null."); |
| 175 | + PADDLE_ENFORCE(ctx->HasInput("last_c"), |
| 176 | + "Input(last_c) of LSTM should not be null."); |
| 177 | + |
| 178 | + PADDLE_ENFORCE(ctx->HasInput("Cache"), |
| 179 | + "Input(last_c) of LSTM should not be null."); |
| 180 | + PADDLE_ENFORCE(ctx->HasInput("InitH"), |
| 181 | + "Input(init_h) of LSTM should not be null."); |
| 182 | + |
| 183 | + PADDLE_ENFORCE(ctx->HasInput("InitC"), |
| 184 | + "Input(init_c) of LSTM should not be null."); |
| 185 | + |
| 186 | + auto SetOutGradDim = [&ctx](const std::string& name) { |
| 187 | + auto g_name = framework::GradVarName(name); |
| 188 | + if (ctx->HasOutput(g_name)) { |
| 189 | + ctx->SetOutputDim(g_name, ctx->GetInputDim(name)); |
| 190 | + } |
| 191 | + }; |
| 192 | + |
| 193 | + SetOutGradDim("Input"); |
| 194 | + SetOutGradDim("W"); |
| 195 | + SetOutGradDim("InitH"); |
| 196 | + SetOutGradDim("InitC"); |
| 197 | + } |
| 198 | +}; |
| 199 | + |
| 200 | +template <typename T> |
| 201 | +class NotImpleKernel : public framework::OpKernel<T> { |
| 202 | + public: |
| 203 | + void Compute(const framework::ExecutionContext& ctx) const override { |
| 204 | + PADDLE_THROW( |
| 205 | + "CPU is not support for this kernel now. Will be add in the future"); |
| 206 | + } |
| 207 | +}; |
| 208 | + |
| 209 | +} // namespace operators |
| 210 | +} // namespace paddle |
| 211 | + |
| 212 | +namespace ops = paddle::operators; |
| 213 | +REGISTER_OPERATOR(cudnn_lstm, ops::CudnnLSTMOp, ops::CudnnLSTMOpMaker, |
| 214 | + paddle::framework::DefaultGradOpDescMaker<true>); |
| 215 | +REGISTER_OPERATOR(cudnn_lstm_grad, ops::CudnnLSTMGradOp); |
| 216 | + |
| 217 | +REGISTER_OP_CPU_KERNEL(cudnn_lstm, ops::NotImpleKernel<float>); |
| 218 | +REGISTER_OP_CPU_KERNEL(cudnn_lstm_grad, ops::NotImpleKernel<float>); |
0 commit comments