Skip to content

Commit cc441ee

Browse files
committed
Add cudnn lstm
test=release/1.2
1 parent fa6c2b5 commit cc441ee

File tree

5 files changed

+207
-4
lines changed

5 files changed

+207
-4
lines changed

paddle/fluid/API.spec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,7 @@ paddle.fluid.layers.grid_sampler ArgSpec(args=['x', 'grid', 'name'], varargs=Non
194194
paddle.fluid.layers.log_loss ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None))
195195
paddle.fluid.layers.add_position_encoding ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,))
196196
paddle.fluid.layers.bilinear_tensor_product ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None))
197+
paddle.fluid.layers.lstm ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1))
197198
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
198199
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
199200
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)

paddle/fluid/platform/dynload/cudnn.h

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,23 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
111111
__macro(cudnnFindConvolutionForwardAlgorithmEx); \
112112
__macro(cudnnFindConvolutionBackwardFilterAlgorithmEx); \
113113
__macro(cudnnFindConvolutionBackwardDataAlgorithmEx); \
114-
__macro(cudnnGetErrorString);
114+
__macro(cudnnGetErrorString); \
115+
__macro(cudnnCreateDropoutDescriptor); \
116+
__macro(cudnnDropoutGetStatesSize); \
117+
__macro(cudnnSetDropoutDescriptor); \
118+
__macro(cudnnCreateRNNDescriptor); \
119+
__macro(cudnnSetRNNDescriptor); \
120+
__macro(cudnnGetRNNParamsSize); \
121+
__macro(cudnnGetRNNWorkspaceSize); \
122+
__macro(cudnnGetRNNTrainingReserveSize); \
123+
__macro(cudnnRNNForwardTraining); \
124+
__macro(cudnnRNNBackwardData); \
125+
__macro(cudnnRNNBackwardWeights); \
126+
__macro(cudnnRNNForwardInference); \
127+
__macro(cudnnDestroyDropoutDescriptor); \
128+
__macro(cudnnDestroyRNNDescriptor); \
129+
__macro(cudnnSetRNNDescriptor_v6);
130+
115131
CUDNN_DNN_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_CUDNN_WRAP)
116132

117133
#define CUDNN_DNN_ROUTINE_EACH_R2(__macro) \

python/paddle/fluid/layers/nn.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@
169169
'log_loss',
170170
'add_position_encoding',
171171
'bilinear_tensor_product',
172+
'lstm',
172173
]
173174

174175

@@ -472,6 +473,168 @@ def dynamic_lstm(input,
472473
return hidden, cell
473474

474475

476+
def lstm(input,
477+
init_h,
478+
init_c,
479+
max_len,
480+
hidden_size,
481+
num_layers,
482+
dropout_prob=0.0,
483+
is_bidirec=False,
484+
is_test=False,
485+
name=None,
486+
default_initializer=None,
487+
seed=-1):
488+
"""
489+
If Device is GPU, This op will use cudnn LSTM implementation
490+
491+
A four-gate Long Short-Term Memory network with no peephole connections.
492+
In the forward pass the output ht and cell output ct for a given iteration can be computed from the recurrent input ht-1,
493+
the cell input ct-1 and the previous layer input xt given matrices W, R and biases bW, bR from the following equations:
494+
495+
$$ i_t = \\sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i) $$
496+
497+
$$ f_t = \\sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f) $$
498+
499+
$$ o_t = \\sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o) $$
500+
501+
$$ \\tilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c) $$
502+
503+
$$ c_t = f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t} $$
504+
505+
$$ h_t = o_t \\odot tanh(c_t) $$
506+
507+
- W terms denote weight matrices (e.g. $W_{ix}$ is the matrix
508+
of weights from the input gate to the input)
509+
- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
510+
- sigmoid is the logistic sigmoid function.
511+
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
512+
and cell activation vectors, respectively, all of which have the same size as
513+
the cell output activation vector $h$.
514+
- The $\odot$ is the element-wise product of the vectors.
515+
- `tanh` is the activation functions.
516+
- $\tilde{c_t}$ is also called candidate hidden state,
517+
which is computed based on the current input and the previous hidden state.
518+
519+
Where sigmoid is the sigmoid operator: sigmoid(x) = 1 / (1 + e^-x), * represents a point-wise multiplication,
520+
X represensts a matrix multiplication
521+
522+
523+
Args:
524+
input (Variable): LSTM input tensor, shape MUST be ( seq_len x batch_size x input_size )
525+
init_h(Variable): The initial hidden state of the LSTM
526+
This is a tensor with shape ( num_layers x batch_size x hidden_size)
527+
if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
528+
init_c(Variable): The initial cell state of the LSTM.
529+
This is a tensor with shape ( num_layers x batch_size x hidden_size )
530+
if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size)
531+
max_len (int): max length of LSTM. the first dim of input tensor CAN NOT greater than max_len
532+
hidden_size (int): hidden size of the LSTM
533+
num_layers (int): total layers number of the LSTM
534+
dropout_prob(float|0.0): dropout prob, dropout ONLY work between rnn layers, NOT between time steps
535+
There is NO dropout work on rnn output of the last RNN layers
536+
is_bidirec (bool): If it is bidirectional
537+
is_test (bool): If it is in test phrase
538+
name (str|None): A name for this layer(optional). If set None, the layer
539+
will be named automatically.
540+
default_initializer(Initialize|None): Where use initializer to initialize the Weight
541+
If set None, defaule initializer will be used
542+
seed(int): Seed for dropout in LSTM, If it's -1, dropout will use random seed
543+
544+
545+
Returns:
546+
rnn_out(Tensor): result of LSTM hidden, shape is (seq_len x batch_size x hidden_size)
547+
if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2)
548+
last_h(Tensor): the hidden state of the last step of LSTM
549+
shape is ( num_layers x batch_size x hidden_size )
550+
if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size)
551+
last_c(Tensor): the cell state of the last step of LSTM
552+
shape is ( num_layers x batch_size x hidden_size )
553+
if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size)
554+
555+
556+
Examples:
557+
.. code-block:: python
558+
559+
input = embedding
560+
batch_size = 20
561+
max_len = 100
562+
dropout_prob = 0.2
563+
input_size = 100
564+
hidden_size = 150
565+
num_layers = 1
566+
init_hidden1 = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0, stop_grad=False)
567+
init_cell1 = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0, stop_grad=False)
568+
569+
rnn_out, last_h, last_c = layers.lstm( input, init_h, init_c, \
570+
max_len, dropout_prob, input_size, hidden_size, \
571+
num_layers)
572+
"""
573+
574+
helper = LayerHelper('cudnn_lstm', **locals())
575+
576+
dtype = input.dtype
577+
input_shape = list(input.shape)
578+
input_size = input_shape[-1]
579+
weight_size = 0
580+
for i in range(num_layers):
581+
if i == 0:
582+
input_weight_size = (input_size * hidden_size) * 4
583+
else:
584+
if is_bidirec:
585+
input_weight_size = (hidden_size * 2 * hidden_size) * 4
586+
else:
587+
input_weight_size = (hidden_size * hidden_size) * 4
588+
589+
hidden_weight_size = (hidden_size * hidden_size) * 4
590+
591+
if is_bidirec:
592+
weight_size += (input_weight_size + hidden_weight_size) * 2
593+
weight_size += hidden_size * 8 * 2
594+
else:
595+
weight_size += input_weight_size + hidden_weight_size
596+
weight_size += hidden_size * 8
597+
598+
weight = helper.create_parameter(
599+
attr=helper.param_attr,
600+
shape=[weight_size],
601+
dtype=dtype,
602+
default_initializer=default_initializer)
603+
604+
out = helper.create_variable_for_type_inference(dtype)
605+
last_h = helper.create_variable_for_type_inference(dtype)
606+
last_c = helper.create_variable_for_type_inference(dtype)
607+
608+
cache = helper.create_variable(
609+
persistable=True, type=core.VarDesc.VarType.RAW, stop_gradient=True)
610+
611+
helper.append_op(
612+
type='cudnn_lstm',
613+
inputs={
614+
'Input': input,
615+
'InitH': init_h,
616+
'InitC': init_c,
617+
'W': weight,
618+
'Cache': cache,
619+
},
620+
outputs={
621+
'Out': out,
622+
'last_h': last_h,
623+
'last_c': last_c,
624+
},
625+
attrs={
626+
'max_len': max_len,
627+
'is_bidirec': is_bidirec,
628+
'input_size': input_size,
629+
'hidden_size': hidden_size,
630+
'num_layers': num_layers,
631+
'is_test': is_test,
632+
'dropout_prob': dropout_prob,
633+
'seed': seed,
634+
})
635+
return out, last_h, last_c
636+
637+
475638
def dynamic_lstmp(input,
476639
size,
477640
proj_size,

python/paddle/fluid/tests/unittests/op_test.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,15 @@ def _append_ops(self, block):
216216
self.dtype)
217217
outputs = append_input_output(block, op_proto, self.outputs, False,
218218
self.dtype)
219+
220+
if hasattr(self, "cache_name_list"):
221+
for name in self.cache_name_list:
222+
inputs[name] = block.create_var(
223+
name=name,
224+
persistable=True,
225+
type=core.VarDesc.VarType.RAW,
226+
stop_gradient=True)
227+
219228
op = block.append_op(
220229
type=self.op_type,
221230
inputs=inputs,
@@ -428,8 +437,17 @@ def check_grad_with_place(self,
428437
op_inputs = self.inputs if hasattr(self, "inputs") else dict()
429438
op_outputs = self.outputs if hasattr(self, "outputs") else dict()
430439
op_attrs = self.attrs if hasattr(self, "attrs") else dict()
431-
self.op = create_op(self.scope, self.op_type, op_inputs, op_outputs,
432-
op_attrs)
440+
441+
cache_list = None
442+
if hasattr(self, "cache_name_list"):
443+
cache_list = self.cache_name_list
444+
self.op = create_op(
445+
self.scope,
446+
self.op_type,
447+
op_inputs,
448+
op_outputs,
449+
op_attrs,
450+
cache_list=cache_list)
433451

434452
if no_grad_set is None:
435453
no_grad_set = set()

python/paddle/fluid/tests/unittests/testsuite.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
from paddle.fluid.op import Operator
2121

2222

23-
def create_op(scope, op_type, inputs, outputs, attrs):
23+
def create_op(scope, op_type, inputs, outputs, attrs, cache_list=None):
2424
kwargs = dict()
2525

2626
op_maker = core.op_proto_and_checker_maker
@@ -43,6 +43,11 @@ def __create_var__(name, var_name):
4343
__create_var__(in_name, sub_in_name)
4444
else:
4545
__create_var__(in_name, in_name)
46+
if cache_list != None and isinstance(cache_list, list):
47+
for name in cache_list:
48+
kwargs[name] = []
49+
scope.var(name)
50+
kwargs[name].append(name)
4651

4752
for out_name, out_dup in Operator.get_op_outputs(op_type):
4853
if out_name in outputs:

0 commit comments

Comments
 (0)