fix RNN LSTM GRU

hanjr92 · hanjr92 · commit 7d6dcf95fdd3 · 2022-06-17T17:16:50.000+08:00
diff --git a/examples/basic_tutorials/imdb_LSTM_simple.py b/examples/basic_tutorials/imdb_LSTM_simple.py
@@ -4,9 +4,9 @@
 # The same set of code can switch the backend with one line
 import os
 # os.environ['TL_BACKEND'] = 'tensorflow'
-# os.environ['TL_BACKEND'] = 'mindspore'
+os.environ['TL_BACKEND'] = 'mindspore'
 # os.environ['TL_BACKEND'] = 'paddle'
-os.environ['TL_BACKEND'] = 'torch'
+# os.environ['TL_BACKEND'] = 'torch'
 import tensorlayerx as tlx
 from tensorlayerx.nn import Module
 from tensorlayerx.nn import Linear, LSTM, Embedding
@@ -42,7 +42,7 @@ class ImdbNet(Module):
     def __init__(self):
         super(ImdbNet, self).__init__()
         self.embedding = Embedding(num_embeddings=vocab_size, embedding_dim=64)
-        self.lstm = LSTM(input_size=64, hidden_size=64, num_layers=2)
+        self.lstm = LSTM(input_size=64, hidden_size=64)
         self.linear1 = Linear(in_features=64, out_features=64, act=tlx.nn.ReLU)
         self.linear2 = Linear(in_features=64, out_features=2)
 
diff --git a/tensorlayerx/backend/ops/paddle_nn.py b/tensorlayerx/backend/ops/paddle_nn.py
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import paddle as pd
+import paddle.nn
 from paddle import framework
 import paddle.nn.functional as F
 import numpy as np
@@ -13,7 +14,7 @@
 from paddle.nn.layer.rnn import RNNCellBase
 import warnings
 import math
-
+from paddle import _C_ops
 
 def padding_format(padding):
     """
@@ -1503,7 +1504,6 @@ def concat_states(states, bidirectional=False, state_components=1):
             componnets.append(states[i::state_components])
         return tuple([pd.stack(item) for item in componnets])
 
-
 class rnnbase(LayerList):
 
     def __init__(
@@ -1539,7 +1539,6 @@ def __init__(
         self.bias = bias
         RNN = pd.nn.RNN
         BiRNN = pd.nn.BiRNN
-
         kwargs = {"weight_ih_attr": None, "weight_hh_attr": None, "bias_ih_attr": self.bias, "bias_hh_attr": self.bias}
         act = None
         rnn_cls = None
@@ -1618,16 +1617,11 @@ def __init__(
 
     def flatten_parameters(self):
         if self.could_use_cudnn:
-            params = self.parameters(include_sublayers=False)
-            shape = [np.prod(param.shape) for param in params]
-            self._all_weights = [None] * len(params)
-            for i, param in enumerate(params):
-                offset = 0 if i % 4 < 2 else (2 * self.num_layers * self.bidirect)
-                layer_idx = i // 4
-                self._all_weights[offset + layer_idx * 2 + i % 2] = param
+            self._all_weights = self.parameters(include_sublayers=False)
+            shape = [np.prod(param.shape) for param in self._all_weights]
             self._flat_weight = [
                 self.create_parameter(
-                    shape=[np.sum(shape)], dtype=params[0].dtype, default_initializer=I.Constant(0.0)
+                    shape=[np.sum(shape)], dtype=self._all_weights[0].dtype, default_initializer=I.Constant(0.0)
                 )
             ]
             self._dropout_state = self.create_variable(dtype=fluid.core.VarDesc.VarType.UINT8)
@@ -1640,42 +1634,20 @@ def flatten_parameters(self):
                         }, attrs={
                             "copy_data": True,
                             "use_align": False,
-                            "dtype": params[0].dtype
+                            "dtype": self._all_weights[0].dtype
                         }
                     )
 
     def _cudnn_impl(self, inputs, initial_states, sequence_length):
         if not self.time_major:
             inputs = pd.tensor.transpose(inputs, [1, 0, 2])
-        out = self._helper.create_variable_for_type_inference(inputs.dtype)
-        state = [self._helper.create_variable_for_type_inference(inputs.dtype) for i in range(self.state_components)]
-        reserve = self._helper.create_variable_for_type_inference(
-            dtype=fluid.core.VarDesc.VarType.UINT8, stop_gradient=True
-        )
-
-        inputs = {
-            'Input': inputs,
-            'WeightList': self._all_weights,
-            'PreState': initial_states,
-            'SequenceLength': sequence_length
-        }
-        attrs = {
-            'dropout_prob': self.dropout,
-            'is_bidirec': self.bidirect == 2,
-            'input_size': self.input_size,
-            'hidden_size': self.hidden_size,
-            'num_layers': self.num_layers,
-            'mode': self.mode,
-            'is_test': not self.training
-        }
-
-        outputs = {
-            'Out': out,
-            'State': state,
-            'Reserve': reserve,
-            'DropoutState': self._dropout_state,
-        }
-        self._helper.append_op(type="rnn", inputs=inputs, outputs=outputs, attrs=attrs)
+        _, _, out, state = _C_ops.rnn(
+            inputs, initial_states, self._all_weights, sequence_length,
+            self._dropout_state, self.state_components, 'dropout_prob',
+            self.dropout, 'is_bidirec', self.bidirect == 2,
+            'input_size', self.input_size, 'hidden_size', self.hidden_size,
+            'num_layers', self.num_layers, 'mode', self.mode, 'is_test',
+            not self.training)
         out = pd.tensor.transpose(out, [1, 0, 2]) if not self.time_major else out
         return out, tuple(state) if len(state) > 1 else state[0]
 
diff --git a/tensorlayerx/backend/ops/tensorflow_nn.py b/tensorlayerx/backend/ops/tensorflow_nn.py
@@ -2183,7 +2183,6 @@ def __init__(
         self.w_hh = w_hh
         self.b_ih = b_ih
         self.b_hh = b_hh
-
         self.act_fn = None
         if mode == 'LSTM':
             # gate_size = 4 * hidden_size
diff --git a/tensorlayerx/nn/core/core_tensorflow.py b/tensorlayerx/nn/core/core_tensorflow.py
@@ -146,9 +146,6 @@ def __setattr__(self, name, value):
                 raise TypeError("Expected type is Module, but got Parameter.")
             self.insert_param_to_layer(name, value)
 
-        elif isinstance(value, ParameterTuple):
-            self.set_attr_for_parameter_tuple(name, value)
-
         elif isinstance(value, Module):
             if layers is None:
                 raise AttributeError("Can not assign layers before Module.__init__() call.")
@@ -297,27 +294,6 @@ def _compute_shape(tensors):
             shape_mem = tlx.get_tensor_shape(tensors)
         return shape_mem
 
-    def set_attr_for_parameter_tuple(self, name, value):
-        """Set attr for parameter in ParameterTuple."""
-        params = self.__dict__.get('_params')
-        params_tuple = self.__dict__.get('_params_tuple')
-        if params is None:
-            raise AttributeError("For 'Module', can not assign params before Module.__init__() is called.")
-        exist_names = set("")
-
-        for item in value:
-            self.insert_param_to_layer(item.name, item, check_name=False)
-            if item.name in exist_names:
-                raise ValueError("The value {} , its name '{}' already exists.".
-                                 format(value, item.name))
-            exist_names.add(item.name)
-
-        if name in self.__dict__:
-            del self.__dict__[name]
-        if name in params:
-            del params[name]
-        params_tuple[name] = value
-
     def insert_param_to_layer(self, param_name, param, check_name=True):
         """
         Adds a parameter to the current layer.
diff --git a/tensorlayerx/nn/layers/recurrent.py b/tensorlayerx/nn/layers/recurrent.py
@@ -448,10 +448,10 @@ def __repr__(self):
 
     def build(self, inputs_shape):
         bidirect = 2 if self.bidirectional else 1
-        self.w_ih = []
-        self.w_hh = []
-        self.b_ih = []
-        self.b_hh = []
+        self.weight_ih = ParameterList()
+        self.weight_hh = ParameterList()
+        self.bias_ih = ParameterList()
+        self.bias_hh =ParameterList()
         stdv = 1.0 / np.sqrt(self.hidden_size)
         _init = tlx.nn.initializers.RandomUniform(minval=-stdv, maxval=stdv)
         if self.mode == 'LSTM':
@@ -465,37 +465,33 @@ def build(self, inputs_shape):
                 layer_input_size = self.input_size if layer == 0 else self.hidden_size * bidirect
                 suffix = '_reverse' if direction == 1 else ''
 
-                self.w_ih.append(
+                self.weight_ih.append(
                     self._get_weights(
                         var_name='weight_ih_l{}{}'.format(layer, suffix), shape=(gate_size, layer_input_size),
                         init=_init
                     )
                 )
-                self.w_hh.append(
+                self.weight_hh.append(
                     self._get_weights(
                         var_name='weight_hh_l{}{}'.format(layer, suffix), shape=(gate_size, self.hidden_size),
                         init=_init
                     )
                 )
                 if self.bias:
-                    self.b_ih.append(
+                    self.bias_ih.append(
                         self._get_weights(
                             var_name='bias_ih_l{}{}'.format(layer, suffix), shape=(gate_size, ), init=_init
                         )
                     )
-                    self.b_hh.append(
+                    self.bias_hh.append(
                         self._get_weights(
                             var_name='bias_hh_l{}{}'.format(layer, suffix), shape=(gate_size, ), init=_init
                         )
                     )
-        self.weight_ih = ParameterList(self.w_ih)
-        self.weight_hh = ParameterList(self.w_hh)
-        self.bias_ih = ParameterList(self.b_ih)
-        self.bias_hh =ParameterList(self.b_hh)
         self.rnn = tlx.ops.rnnbase(
             mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
             bias=self.bias, batch_first=self.batch_first, dropout=self.dropout, bidirectional=self.bidirectional,
-            is_train=self.is_train, w_ih=self.w_ih, w_hh=self.w_hh, b_ih=self.b_ih, b_hh=self.b_hh
+            is_train=self.is_train, w_ih=self.weight_ih, w_hh=self.weight_hh, b_ih=self.bias_ih, b_hh=self.bias_hh
         )
 
     def forward(self, input, states=None):