add SimpleRNN layer(refer to ONNX RNN)

minglu2019 · minglu2019 · commit d41823e00181 · 2020-06-12T09:52:34.000+02:00
diff --git a/include/caffe/layers/simple_rnn_layer.hpp b/include/caffe/layers/simple_rnn_layer.hpp
@@ -0,0 +1,87 @@
+#ifndef CAFFE_SIMPLE_RNN_LAYER_HPP_
+#define CAFFE_SIMPLE_RNN_LAYER_HPP_
+
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/recurrent_layer.hpp"
+#include "caffe/net.hpp"
+#include "caffe/proto/caffe.pb.h"
+
+namespace caffe {
+
+template <typename Dtype> class RecurrentLayer;
+
+/**
+
+* ONNX specification
+* Notations:
+* X - input tensor
+*
+* i - input gate
+*
+* t - time step (t-1 means previous time step)
+*
+* W[i] - W parameter weight matrix for input gates
+*
+* R[i] - R recurrence weight matrix for input gates
+*
+* Wb[i] - W bias vectors for input gates
+*
+* Rb[i] - R bias vectors for input gates
+*
+* WB[i] - W parameter weight matrix for backward input, output, forget, and cell gates
+*
+* RB[i] - R recurrence weight matrix for backward input, output, forget, and cell gates
+*
+* WBb[i] - W bias vectors for backward input, output, forget, and cell gates
+*
+* RBb[i] - R bias vectors for backward input, output, forget, and cell gates
+*
+* H - Hidden state
+* num_directions - 2 if direction == bidirectional else 1
+/////////////////////////////////////////////////////////////////
+// - Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)               //
+/////////////////////////////////////////////////////////////////
+* ONNX specification end
+* Inputs:
+1. X, shape (T, N, input_size)
+- T is the time step
+- N is the number of the independent streams
+2. continue flag, shape (T, N)
+3. X_static (optional, (N, input_size))
+4. init_hidden_state, shape (1, N, num_output)
+
+* Outputs:
+1. outputs, shape (T, N, num_output)
+2. final_hidden_state, shape (1, N, num_ouput)
+* Shapes of weights and bias:
+1. W: (num_ouptut, input_size)
+2. B: (num_output,)
+3. W_static (optional, (num_output, input_size))
+4. R: (num_output, num_output)
+ */
+template <typename Dtype>
+class SimpleRNNLayer : public RecurrentLayer<Dtype> {
+ public:
+  explicit SimpleRNNLayer(const LayerParameter& param)
+      : RecurrentLayer<Dtype>(param) {}
+
+  virtual inline const char* type() const { return "SimpleRNN"; }
+
+ protected:
+  virtual void FillUnrolledNet(NetParameter* net_param) const;
+  virtual void RecurrentInputBlobNames(vector<string>* names) const;
+  virtual void RecurrentOutputBlobNames(vector<string>* names) const;
+  virtual void RecurrentInputShapes(vector<BlobShape>* shapes) const;
+  virtual void OutputBlobNames(vector<string>* names) const;
+};
+
+}  // namespace caffe
+
+#endif  // CAFFE_SIMPLE_RNN_LAYER_HPP_
+
diff --git a/src/caffe/layers/simple_rnn_layer.cpp b/src/caffe/layers/simple_rnn_layer.cpp
@@ -0,0 +1,266 @@
+#include <string>
+#include <vector>
+
+#include "caffe/blob.hpp"
+#include "caffe/common.hpp"
+#include "caffe/filler.hpp"
+#include "caffe/layer.hpp"
+#include "caffe/layers/simple_rnn_layer.hpp"
+#include "caffe/util/math_functions.hpp"
+
+namespace caffe {
+
+template <typename Dtype>
+void SimpleRNNLayer<Dtype>::RecurrentInputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "h_0";
+}
+
+template <typename Dtype>
+void SimpleRNNLayer<Dtype>::RecurrentOutputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "h_" + format_int(this->T_);
+}
+
+template <typename Dtype>
+void SimpleRNNLayer<Dtype>::RecurrentInputShapes(vector<BlobShape>* shapes) const {
+  const int num_output = this->layer_param_.recurrent_param().num_output();
+  shapes->resize(1);
+  (*shapes)[0].Clear();
+  (*shapes)[0].add_dim(1);  // a single timestep
+  (*shapes)[0].add_dim(this->N_);
+  (*shapes)[0].add_dim(num_output);
+}
+
+template <typename Dtype>
+void SimpleRNNLayer<Dtype>::OutputBlobNames(vector<string>* names) const {
+  names->resize(1);
+  (*names)[0] = "h";
+}
+
+template <typename Dtype>
+void SimpleRNNLayer<Dtype>::FillUnrolledNet(NetParameter* net_param) const {
+  const int num_output = this->layer_param_.recurrent_param().num_output();
+
+  CHECK_GT(num_output, 0) << "num_output must be positive";
+  const FillerParameter& weight_filler =
+      this->layer_param_.recurrent_param().weight_filler();
+  const FillerParameter& bias_filler =
+      this->layer_param_.recurrent_param().bias_filler();
+
+  // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
+  // use to save redundant code.
+  LayerParameter hidden_param;
+  hidden_param.set_type("InnerProduct");
+  hidden_param.mutable_inner_product_param()->set_num_output(num_output);
+  hidden_param.mutable_inner_product_param()->set_bias_term(false);
+  hidden_param.mutable_inner_product_param()->set_axis(2);
+  hidden_param.mutable_inner_product_param()->
+      mutable_weight_filler()->CopyFrom(weight_filler);
+
+  LayerParameter biased_hidden_param(hidden_param);
+  biased_hidden_param.mutable_inner_product_param()->set_bias_term(true);
+  biased_hidden_param.mutable_inner_product_param()->
+      mutable_bias_filler()->CopyFrom(bias_filler);
+
+  LayerParameter sum_param;
+  sum_param.set_type("Eltwise");
+  sum_param.mutable_eltwise_param()->set_operation(
+      EltwiseParameter_EltwiseOp_SUM);
+
+  LayerParameter scale_param;
+  scale_param.set_type("Scale");
+  scale_param.mutable_scale_param()->set_axis(0);
+
+  LayerParameter slice_param;
+  slice_param.set_type("Slice");
+  slice_param.mutable_slice_param()->set_axis(0);
+
+  // add activations for ONNX-RNN
+  LayerParameter F_activation_param;
+  if ((this->activations_.size() == 0) || (this->activations_[0] == "Tanh") || (this->activations_[0] == "tanh")) {
+	  F_activation_param.set_type("TanH");
+  }
+  else {
+	  // different name
+	  if ((this->activations_[0] == "Elu") || (this->activations_[0] == "elu")) {
+		  F_activation_param.set_type("ELU");
+		  if (this->activation_alpha_.size() > 0) {
+			  F_activation_param.mutable_elu_param()->set_alpha(this->activation_alpha_[0]);
+		  }
+	  }
+	  if (this->activations_[0] == "LeakyRelu") {
+		  F_activation_param.set_type("ReLU");
+		  if (this->activation_alpha_.size() > 0) {
+			  F_activation_param.mutable_relu_param()->set_negative_slope(this->activation_alpha_[0]);
+		  }
+	  }
+	  if ((this->activations_[0] == "Relu") || (this->activations_[0] == "relu")) {
+		  F_activation_param.set_type("ReLU");
+	  }
+	  if (this->activations_[0] == "ScaledTanh") {
+		  F_activation_param.set_type("ScaledTanH");
+		  if (this->activation_alpha_.size() > 0) {
+			  F_activation_param.mutable_scaled_tanh_param()->set_alpha(this->activation_alpha_[0]);
+		  }
+		  if (this->activation_beta_.size() > 0) {
+			  F_activation_param.mutable_scaled_tanh_param()->set_beta(this->activation_beta_[0]);
+		  }
+	  }
+	  if (this->activations_[0] == "ThresholdedRelu") {
+		  F_activation_param.set_type("ThresholdedReLU");
+		  if (this->activation_alpha_.size() > 0) {
+			  F_activation_param.mutable_thresholded_relu_param()->set_alpha(this->activation_alpha_[0]);
+		  }
+	  }
+	  // the same name
+	  if (this->activations_[0] == "HardSigmoid") {
+		  F_activation_param.set_type("HardSigmoid");
+		  if (this->activation_alpha_.size() > 0) {
+			  F_activation_param.mutable_hard_sigmoid_param()->set_alpha(this->activation_alpha_[0]);
+		  }
+		  if (this->activation_beta_.size() > 0) {
+			  F_activation_param.mutable_hard_sigmoid_param()->set_beta(this->activation_beta_[0]);
+		  }
+	  }
+	  if ((this->activations_[0] == "Sigmoid") || (this->activations_[0] == "sigmoid")) {
+		  F_activation_param.set_type("Sigmoid");
+	  }
+	  if ((this->activations_[0] == "Softsign") || (this->activations_[0] == "softsign")) {
+		  F_activation_param.set_type("Softsign");
+	  }
+  }
+
+  vector<BlobShape> input_shapes;
+  RecurrentInputShapes(&input_shapes);
+  CHECK_EQ(1, input_shapes.size());
+
+  LayerParameter* input_layer_param = net_param->add_layer();
+  input_layer_param->set_type("Input");
+  InputParameter* input_param = input_layer_param->mutable_input_param();
+  input_layer_param->add_top("h_0");
+  input_param->add_shape()->CopyFrom(input_shapes[0]);
+
+  LayerParameter* cont_slice_param = net_param->add_layer();
+  cont_slice_param->CopyFrom(slice_param);
+  cont_slice_param->set_name("cont_slice");
+  cont_slice_param->add_bottom("cont");
+  cont_slice_param->mutable_slice_param()->set_axis(0);
+
+  // Add layer to transform all timesteps of x to the hidden state dimension.
+  //     W_xh_x = W_xh * x + b_h
+  {
+    LayerParameter* x_transform_param = net_param->add_layer();
+    x_transform_param->CopyFrom(biased_hidden_param);
+    x_transform_param->set_name("x_transform");
+    x_transform_param->add_param()->set_name("W_xh");
+    x_transform_param->add_param()->set_name("b_h");
+    x_transform_param->add_bottom("x");
+    x_transform_param->add_top("W_xh_x");
+    x_transform_param->add_propagate_down(true);
+  }
+
+  if (this->static_input_) {
+    // Add layer to transform x_static to the hidden state dimension.
+    //     W_xh_x_static = W_xh_static * x_static
+    LayerParameter* x_static_transform_param = net_param->add_layer();
+    x_static_transform_param->CopyFrom(hidden_param);
+    x_static_transform_param->mutable_inner_product_param()->set_axis(1);
+    x_static_transform_param->set_name("W_xh_x_static");
+    x_static_transform_param->add_param()->set_name("W_xh_static");
+    x_static_transform_param->add_bottom("x_static");
+    x_static_transform_param->add_top("W_xh_x_static_preshape");
+    x_static_transform_param->add_propagate_down(true);
+
+    LayerParameter* reshape_param = net_param->add_layer();
+    reshape_param->set_type("Reshape");
+    BlobShape* new_shape =
+         reshape_param->mutable_reshape_param()->mutable_shape();
+    new_shape->add_dim(1);  // One timestep.
+    // Should infer this->N as the dimension so we can reshape on batch size.
+    new_shape->add_dim(-1);
+    new_shape->add_dim(
+        x_static_transform_param->inner_product_param().num_output());
+    reshape_param->set_name("W_xh_x_static_reshape");
+    reshape_param->add_bottom("W_xh_x_static_preshape");
+    reshape_param->add_top("W_xh_x_static");
+  }
+
+  LayerParameter* x_slice_param = net_param->add_layer();
+  x_slice_param->CopyFrom(slice_param);
+  x_slice_param->set_name("W_xh_x_slice");
+  x_slice_param->add_bottom("W_xh_x");
+
+  LayerParameter output_concat_layer;
+  output_concat_layer.set_name("h_concat");
+  output_concat_layer.set_type("Concat");
+  output_concat_layer.add_top("h");
+  output_concat_layer.mutable_concat_param()->set_axis(0);
+
+  for (int t = 1; t <= this->T_; ++t) {
+    string tm1s = format_int(t - 1);
+    string ts = format_int(t);
+
+    cont_slice_param->add_top("cont_" + ts);
+    x_slice_param->add_top("W_xh_x_" + ts);
+
+    // Add layer to flush the hidden state when beginning a new sequence,
+    // as indicated by cont_t.
+    //     h_conted_{t-1} := cont_t * h_{t-1}
+    //
+    // Normally, cont_t is binary (i.e., 0 or 1), so:
+    //     h_conted_{t-1} := h_{t-1} if cont_t == 1
+    //                       0   otherwise
+    {
+      LayerParameter* cont_h_param = net_param->add_layer();
+      cont_h_param->CopyFrom(scale_param);
+      cont_h_param->set_name("h_conted_" + tm1s);
+      cont_h_param->add_bottom("h_" + tm1s);
+      cont_h_param->add_bottom("cont_" + ts);
+      cont_h_param->add_top("h_conted_" + tm1s);
+    }
+
+    // Add layer to compute
+    //     W_hh_h_{t-1} := W_hh * h_conted_{t-1}
+    {
+      LayerParameter* w_param = net_param->add_layer();
+      w_param->CopyFrom(hidden_param);
+      w_param->set_name("W_hh_h_" + tm1s);
+      w_param->add_param()->set_name("W_hh");
+      w_param->add_bottom("h_conted_" + tm1s);
+      w_param->add_top("W_hh_h_" + tm1s);
+      w_param->mutable_inner_product_param()->set_axis(2);
+    }
+
+    // Add layers to compute
+    //     h_t := \F_activation( W_hh * h_conted_{t-1} + W_xh * x_t + b_h )
+    //          = \F_activation( W_hh_h_{t-1} + W_xh_t )
+    {
+      LayerParameter* h_input_sum_param = net_param->add_layer();
+      h_input_sum_param->CopyFrom(sum_param);
+      h_input_sum_param->set_name("h_input_sum_" + ts);
+      h_input_sum_param->add_bottom("W_hh_h_" + tm1s);
+      h_input_sum_param->add_bottom("W_xh_x_" + ts);
+      if (this->static_input_) {
+        h_input_sum_param->add_bottom("W_xh_x_static");
+      }
+      h_input_sum_param->add_top("h_neuron_input_" + ts);
+    }
+    {
+      LayerParameter* h_neuron_param = net_param->add_layer();
+      h_neuron_param->CopyFrom(F_activation_param);
+      h_neuron_param->set_name("h_neuron_" + ts);
+      h_neuron_param->add_bottom("h_neuron_input_" + ts);
+      h_neuron_param->add_top("h_" + ts);
+    }
+	output_concat_layer.add_bottom("h_" + ts);
+  }  // for (int t = 1; t <= this->T_; ++t)
+
+  net_param->add_layer()->CopyFrom(output_concat_layer);
+}
+
+INSTANTIATE_CLASS(SimpleRNNLayer);
+REGISTER_LAYER_CLASS(SimpleRNN);
+
+}  // namespace caffe
+