Merge remote-tracking branch 'upstream/master'

gangliao · gangliao · commit 5bca34ed24d3 · 2016-09-23T21:45:21.000+08:00
diff --git a/doc/algorithm/rnn/rnn.rst b/doc/algorithm/rnn/rnn.rst
@@ -142,12 +142,15 @@ We also project the encoder vector to :code:`decoder_size` dimensional space, ge
 The decoder uses :code:`recurrent_group` to define the recurrent neural network. The step and output functions are defined in :code:`gru_decoder_with_attention`:
 
 .. code-block:: python
-
+    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
+                  StaticInput(input=encoded_proj,is_seq=True)]
     trg_embedding = embedding_layer(
         input=data_layer(name='target_language_word',
                          size=target_dict_dim),
         size=word_vector_dim,
         param_attr=ParamAttr(name='_target_language_embedding'))
+    group_inputs.append(trg_embedding)
+
     # For decoder equipped with attention mechanism, in training,
     # target embedding (the groudtruth) is the data input,
     # while encoded source sequence is accessed to as an unbounded memory.
@@ -156,13 +159,7 @@ The decoder uses :code:`recurrent_group` to define the recurrent neural network.
     # All sequence inputs should have the same length.
     decoder = recurrent_group(name=decoder_group_name,
                               step=gru_decoder_with_attention,
-                              input=[
-                                  StaticInput(input=encoded_vector,
-                                              is_seq=True),
-                                  StaticInput(input=encoded_proj,
-                                              is_seq=True),
-                                  trg_embedding
-                              ])
+                              input=group_inputs)
 
 
 The implementation of the step function is listed as below. First, it defines the **memory** of the decoder network. Then it defines attention, gated recurrent unit step function, and the output function:
@@ -217,10 +214,8 @@ The code is listed below:
 
 .. code-block:: python
 
-    gen_inputs = [StaticInput(input=encoded_vector,
-                              is_seq=True),
-                  StaticInput(input=encoded_proj,
-                              is_seq=True), ]
+    group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
+                  StaticInput(input=encoded_proj,is_seq=True)]
     # In generation, decoder predicts a next target word based on
     # the encoded source sequence and the last generated target word.
     # The encoded source sequence (encoder's output) must be specified by
@@ -231,10 +226,10 @@ The code is listed below:
         size=target_dict_dim,
         embedding_name='_target_language_embedding',
         embedding_size=word_vector_dim)
-    gen_inputs.append(trg_embedding)
+    group_inputs.append(trg_embedding)
     beam_gen = beam_search(name=decoder_group_name,
                            step=gru_decoder_with_attention,
-                           input=gen_inputs,
+                           input=group_inputs,
                            id_input=data_layer(name="sent_id",
                                                size=1),
                            dict_file=trg_dict_path,
diff --git a/doc/ui/api/trainer_config_helpers/layers.rst b/doc/ui/api/trainer_config_helpers/layers.rst
@@ -169,6 +169,12 @@ dotmul_projection
     :members: dotmul_projection
     :noindex:
 
+dotmul_operator
+---------------
+..  automodule:: paddle.trainer_config_helpers.layers
+    :members: dotmul_operator
+    :noindex:
+
 full_matrix_projection
 ----------------------
 ..  automodule:: paddle.trainer_config_helpers.layers
diff --git a/paddle/gserver/layers/CudnnConvLayer.cpp b/paddle/gserver/layers/CudnnConvLayer.cpp
@@ -85,6 +85,7 @@ bool CudnnConvLayer::init(const LayerMap &layerMap,
     biasOffset_ = numFilters_ / groups_[0];
   }
 
+  batchNum_ = 0;
   isSelectAlgo_ = false;
   return true;
 }
@@ -132,6 +133,11 @@ void CudnnConvLayer::reshape(int batchSize) {
   getOutput().setFrameHeight(outputH_);
   getOutput().setFrameWidth(outputW_);
 
+  // if the batchSize remains the same, set isSelectAlgo_ true.
+  // Otherwise, set isSelectAlgo_ false and select algo again.
+  isSelectAlgo_ = (batchSize == batchNum_);
+  batchNum_ = batchSize;
+
   size_t maxWorkSpace = 0;
   for (size_t i = 0; i < inputLayers_.size(); i++) {
     CHECK_EQ(inputLayers_[i]->getOutput().value->getWidth(),
@@ -160,6 +166,10 @@ void CudnnConvLayer::reshape(int batchSize) {
 
       maxWorkSpace = std::max(fwdLimitBytes_[i], bwdDataLimitBytes_[i]);
       maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_[i]);
+
+      VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_[i]
+                           << " / " << bwdDataAlgo_[i]
+                           << " / " << bwdFilterAlgo_[i];
     }
   }
 
diff --git a/paddle/gserver/layers/CudnnConvLayer.h b/paddle/gserver/layers/CudnnConvLayer.h
@@ -87,6 +87,10 @@ class CudnnConvLayer : public ConvBaseLayer {
   /// Is or not select conv algorihtm.
   bool isSelectAlgo_;
 
+  /// batchNum is used to record batch size. If the batch size is changed,
+  /// the selection algorithm will be called.
+  int batchNum_;
+
 public:
   explicit CudnnConvLayer(const LayerConfig& config) : ConvBaseLayer(config) {}
 
diff --git a/paddle/gserver/layers/MultinomialSampler.cpp b/paddle/gserver/layers/MultinomialSampler.cpp
@@ -19,7 +19,7 @@ namespace paddle {
 
 MultinomialSampler::MultinomialSampler(const real* prob, int size)
     : rand_(0.0, size) {
-  intervals_.reserve(size + 1);
+  intervals_.resize(size + 1);
   double sum = 0;
   for (int i = 0; i < size; ++i) {
     sum += prob[i];
@@ -50,12 +50,13 @@ MultinomialSampler::MultinomialSampler(const real* prob, int size)
   int bigPos = nextBigPos(0);
 
   auto fillIntervals = [&]() {
-    while (bigPos < size && smallPos < size) {
+    while (bigPos < size) {
       while (intervals_[bigPos].thresh > 1 && smallPos < size) {
         intervals_[smallPos].otherId = bigPos;
         intervals_[bigPos].thresh -= 1 - intervals_[smallPos].thresh;
         smallPos = nextSmallPos(smallPos + 1);
       }
+      if (smallPos >= size) break;
       bigPos = nextBigPos(bigPos + 1);
       // If intervals_[bigPos].thresh < 1, it becomes a small interval
     }
diff --git a/paddle/gserver/tests/test_MultinomialSampler.cpp b/paddle/gserver/tests/test_MultinomialSampler.cpp
@@ -41,39 +41,42 @@ class MultinomialSamplerTester : public MultinomialSampler {
 TEST(MultinomialSampler, gen) {
   int numGrids = 1024 * 1024;
   int size = 1024 * 4;
-
   default_random_engine reng;
-  uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
-  vector<real> prob;
-  int sum = 0;
-  for (int i = 0; i < size; ++i) {
-    prob.push_back(rand(reng));
-    sum += prob.back();
-  }
-  CHECK_LE(sum, numGrids);
-  prob.back() += numGrids - sum;
 
-  vector<int> counts(size);
-  MultinomialSamplerTester sampler(&prob[0], size);
-  counts.assign(size, 0);
-  {
-    double s = (double)size / (double)numGrids;
-    REGISTER_TIMER("MultinomialSampler");
-    for (double i = 0; i < numGrids; ++i) {
-      int ret = sampler.testGen([i, s]() { return s * i; });
-      if (ret < 0 || ret >= size) {
-        EXPECT_GE(ret, 0);
-        EXPECT_LT(ret, size);
-        break;
+  for (size_t iter=0; iter < 256; ++iter) {
+    uniform_int_distribution<int> rand(1, numGrids / size * 1.8);
+    vector<real> prob;
+    int sum = 0;
+    for (int i = 0; i < size; ++i) {
+      prob.push_back(rand(reng));
+      sum += prob.back();
+    }
+
+    CHECK_LE(sum, numGrids);
+    prob.back() += numGrids - sum;
+
+    vector<int> counts(size);
+    MultinomialSamplerTester sampler(&prob[0], size);
+    counts.assign(size, 0);
+    {
+      double s = (double)size / (double)numGrids;
+      REGISTER_TIMER("MultinomialSampler");
+      for (double i = 0; i < numGrids; ++i) {
+        int ret = sampler.testGen([i, s]() { return s * i; });
+        if (ret < 0 || ret >= size) {
+          EXPECT_GE(ret, 0);
+          EXPECT_LT(ret, size);
+          break;
+        }
+        ++counts[ret];
       }
-      ++counts[ret];
     }
-  }
-  for (int i = 0; i < size; ++i) {
-    if (prob[i] != counts[i]) {
-      EXPECT_EQ(prob[i], counts[i]);
-      LOG(INFO) << "i=" << i;
-      break;
+    for (int i = 0; i < size; ++i) {
+      if (prob[i] != counts[i]) {
+        EXPECT_EQ(prob[i], counts[i]);
+        LOG(INFO) << iter;
+        break;
+      }
     }
   }
 }
@@ -135,6 +138,7 @@ void benchmarkRandom() {
   LOG(INFO) << "sum1=" << sum1;
 }
 
+
 int main(int argc, char** argv) {
   initMain(argc, argv);
   testing::InitGoogleTest(&argc, argv);
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
@@ -636,7 +636,6 @@ def __init__(
             input_layer_names,
             ):
         self.add_keys(locals())
-
         self.operator_conf = OperatorConfig()
         self.operator_conf.type = self.type
 
@@ -686,12 +685,15 @@ def __init__(
         if num_filters is not None:
             self.operator_conf.num_filters = num_filters
 
-        parse_conv(conv_conf, input_layer_names[0], self.operator_conf.conv_conf, True)
+        parse_conv(conv_conf,
+                   MakeLayerNameInSubmodel(input_layer_names[0]),
+                   self.operator_conf.conv_conf)
         self.operator_conf.output_size = (self.operator_conf.conv_conf.output_x  ** 2) * num_filters
 
         config_assert(len(input_layer_names) == 2, "Conv is binary operator")
 
-
+    def calc_output_size(self, input_sizes):
+        return self.operator_conf.output_size
 
 
 # please refer to the comments in proto/ModelConfig.proto
@@ -2462,11 +2464,11 @@ def __init__(
                 if size != 0:
                     self.set_layer_size(size)
             else:
-                size = operator.calc_output_size(operator_conf.input_sizes)
-                if size != 0:
-                    config_assert(size == self.config.size,
+                sz = operator.calc_output_size(operator_conf.input_sizes)
+                if sz != 0:
+                    config_assert(sz == self.config.size,
                                   "different inputs have different size: %s vs. %s" %
-                                  (size, self.config.size))
+                                  (sz, self.config.size))
         for input_index in xrange(len(self.inputs)):
             input_layer = self.get_input_layer(input_index)
             input = self.inputs[input_index]
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py