follow comments

QiJune · QiJune · commit e2c071358914 · 2016-11-08T10:12:04.000Z
diff --git a/paddle/cuda/include/hl_cnn.h b/paddle/cuda/include/hl_cnn.h
@@ -91,7 +91,7 @@ extern void hl_expand_feature2col(
  * @param[in]   paddingH    padding height.
  * @param[in]   paddingW    padding width.
  * @param[out]  tgtData     output data.
- * @param[in]   tgtStride   output data stride.
+ * @param[in]   tgtStride   stride between output data samples.
  *
  */
 extern void hl_maxpool_forward(
@@ -125,7 +125,7 @@ extern void hl_maxpool_forward(
  * @param[in]   paddingH    padding height.
  * @param[in]   paddingW    padding width.
  * @param[out]  targetGrad  output grad.
- * @param[in]   outStride   output grad data stride. 
+ * @param[in]   outStride   stride between output data samples. 
  *
  */
 extern void hl_maxpool_backward(
@@ -157,7 +157,7 @@ extern void hl_maxpool_backward(
  * @param[in]   paddingH    padding height.
  * @param[in]   paddingW    padding width.
  * @param[out]  tgtData     output data.
- * @param[in]   tgtStride   output data stride.
+ * @param[in]   tgtStride   stride between output data samples.
  *
  */
 extern void hl_avgpool_forward(
@@ -189,7 +189,7 @@ extern void hl_avgpool_forward(
  * @param[in]   scaleA      scale.
  * @param[in]   scaleB      scale.
  * @param[out]  backGrad    output grad.
- * @param[in]   outStride   output grad data stride. 
+ * @param[in]   outStride   stride between output data samples. 
  *
  */
 extern void hl_avgpool_backward(
diff --git a/paddle/gserver/layers/PoolProjection.cpp b/paddle/gserver/layers/PoolProjection.cpp
@@ -34,9 +34,9 @@ PoolProjection* PoolProjection::create(const ProjectionConfig& config,
 void MaxPoolProjection::forward() {
   MatrixPtr inputV = in_->value;
   MatrixPtr outV = out_->value;
-  outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_,
-                       sizeX_, sizeY_, strideY_, stride_,
-                       outputY_, outputX_, confPaddingY_, confPadding_);
+  outV->maxPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_,
+                       strideY_, stride_, outputY_, outputX_, confPaddingY_,
+                       confPadding_);
 }
 
 void MaxPoolProjection::backward(const UpdateCallback& callback) {
@@ -50,17 +50,16 @@ void MaxPoolProjection::backward(const UpdateCallback& callback) {
     return;
   }
   inputGrad->maxPoolBackward(*inputV, imgSizeY_, imgSize_, *outGrad, *outV,
-                             sizeX_, sizeY_,
-                             strideY_, stride_, outputY_, outputX_, 1, 1,
-                             confPaddingY_, confPadding_);
+                             sizeX_, sizeY_, strideY_, stride_, outputY_,
+                             outputX_, 1, 1, confPaddingY_, confPadding_);
 }
 
 void AvgPoolProjection::forward() {
   MatrixPtr inputV = in_->value;
   MatrixPtr outV = out_->value;
-  outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_,
-                       sizeX_, sizeY_, strideY_, stride_,
-                       outputY_, outputX_, confPaddingY_, confPadding_);
+  outV->avgPoolForward(*inputV, imgSizeY_, imgSize_, channels_, sizeX_, sizeY_,
+                       strideY_, stride_, outputY_, outputX_, confPaddingY_,
+                       confPadding_);
 }
 
 void AvgPoolProjection::backward(const UpdateCallback& callback) {
@@ -73,9 +72,8 @@ void AvgPoolProjection::backward(const UpdateCallback& callback) {
     return;
   }
 
-  inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_,
-                             sizeX_, sizeY_, strideY_, stride_,
-                             outputY_, outputX_, 1, 1,
+  inputGrad->avgPoolBackward(*outputGrad, imgSizeY_, imgSize_, sizeX_, sizeY_,
+                             strideY_, stride_, outputY_, outputX_, 1, 1,
                              confPaddingY_, confPadding_);
 }
 }  // namespace paddle
diff --git a/paddle/gserver/layers/PoolProjection.h b/paddle/gserver/layers/PoolProjection.h
@@ -15,6 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include "Projection.h"
+#include "paddle/math/MathUtils.h"
 
 namespace paddle {
 
diff --git a/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp b/paddle/gserver/layers/SpatialPyramidPoolLayer.cpp
@@ -56,8 +56,15 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
 size_t SpatialPyramidPoolLayer::getSize() {
   CHECK_EQ(inputLayers_.size(), 1UL);
   size_t layerSize = 0;
+  const SppConfig& sppConf = config_.inputs(0).spp_conf();
   imgSizeH_ = inputLayers_[0]->getOutput().getFrameHeight();
   imgSizeW_ = inputLayers_[0]->getOutput().getFrameWidth();
+  if (imgSizeH_ == 0) {
+    imgSizeH_ = sppConf.has_img_size_y() ? sppConf.img_size_y() : imgSizeW_;
+  }
+  if (imgSizeW_ == 0) {
+    imgSizeW_ = sppConf.img_size();
+  }
 
   size_t outputH = 1;
   size_t outputW = (std::pow(4, pyramidHeight_) - 1) / (4 - 1);
@@ -66,10 +73,10 @@ size_t SpatialPyramidPoolLayer::getSize() {
 
   getOutput().setFrameHeight(outputH);
   getOutput().setFrameWidth(outputW);
+
   return layerSize;
 }
 
-
 bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
                                    const ParameterMap& parameterMap) {
   Layer::init(layerMap, parameterMap);
@@ -90,8 +97,8 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
   size_t endCol = 0;
   for (size_t i = 0; i < pyramidHeight_; i++) {
     poolProjections_.emplace_back(PoolProjection::create(
-      getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_),
-      nullptr, useGpu_));
+        getConfig(imgSizeW_, imgSizeH_, channels_, i, poolType_), nullptr,
+        useGpu_));
     endCol += poolProjections_[i]->getOutputSize();
     projCol_.push_back(std::make_pair(startCol, endCol));
     startCol = endCol;
@@ -125,4 +132,3 @@ void SpatialPyramidPoolLayer::backward(const UpdateCallback& callback) {
 }
 
 }  // namespace paddle
-
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
@@ -56,7 +56,7 @@
            'rank_cost', 'lambda_cost', 'huber_cost',
            'block_expand_layer',
            'maxout_layer', 'out_prod_layer', 'print_layer', 
-           # 'spp_layer', 
+           'spp_layer', 
            ]
 
 
@@ -112,7 +112,7 @@ class LayerType(object):
     LINEAR_COMBINATION_LAYER = "convex_comb"
     BLOCK_EXPAND = "blockexpand"
     MAXOUT = "maxout"
-    # SPP_LAYER = "spp"
+    SPP_LAYER = "spp"
 
     PRINT_LAYER = "print"
 
@@ -1711,60 +1711,60 @@ def img_pool_layer(input, pool_size, name=None,
                        num_filters=num_channels)
 
 
-# @wrap_name_default("spp")
-# @layer_support()
-# def spp_layer(input, name=None, num_channels=None, pool_type=None,
-#               pyramid_height=None, img_width=None, layer_attr=None):
-#     pass
-#     """
-#     Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
-#     The details please refer to
-#     `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
-
-#     :param name: layer name.
-#     :type name: basestring
-#     :param input: layer's input.
-#     :type input: LayerOutput
-#     :param num_channels: number of input channel.
-#     :type num_channels: int
-#     :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling.
-#     :type scale: BasePoolingType
-#     :param pyramid_height: pyramid height.
-#     :type pyramid_height: int
-#     :param img_width: the width of input feature map. If it is None, the input feature
-#                       map should be square.
-#     :type img_width: int|None
-#     :param layer_attr: Extra Layer Attribute.
-#     :type layer_attr: ExtraLayerAttribute
-#     :return: LayerOutput object.
-#     :rtype: LayerOutput
-#     """
-#     if num_channels is None:
-#         assert input.num_filters is not None
-#         num_channels = input.num_filters
-
-#     if pool_type is None:
-#         pool_type = MaxPooling()
-#     elif isinstance(pool_type, AvgPooling):
-#         pool_type.name = 'avg'
-
-#     type_name = pool_type.name
-#     if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
-#         type_name += '-projection'
-
-#     Layer(
-#         name=name,
-#         type=LayerType.SPP_LAYER,
-#         inputs=Input(input.name,
-#                      spp=SpatialPyramidPool(pool_type=type_name,
-#                                             channels=num_channels,
-#                                             pyramid_height=pyramid_height,
-#                                             img_width=img_width)
-#         ),
-#         **ExtraLayerAttribute.to_kwargs(layer_attr)
-#     )
-#     return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], 
-#                        num_filters=num_channels)
+@wrap_name_default("spp")
+@layer_support()
+def spp_layer(input, name=None, num_channels=None, pool_type=None,
+              pyramid_height=None, img_width=None, layer_attr=None):
+    pass
+    """
+    Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
+    The details please refer to
+    `Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
+
+    :param name: layer name.
+    :type name: basestring
+    :param input: layer's input.
+    :type input: LayerOutput
+    :param num_channels: number of input channel.
+    :type num_channels: int
+    :param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling.
+    :type scale: BasePoolingType
+    :param pyramid_height: pyramid height.
+    :type pyramid_height: int
+    :param img_width: the width of input feature map. If it is None, the input feature
+                      map should be square.
+    :type img_width: int|None
+    :param layer_attr: Extra Layer Attribute.
+    :type layer_attr: ExtraLayerAttribute
+    :return: LayerOutput object.
+    :rtype: LayerOutput
+    """
+    if num_channels is None:
+        assert input.num_filters is not None
+        num_channels = input.num_filters
+
+    if pool_type is None:
+        pool_type = MaxPooling()
+    elif isinstance(pool_type, AvgPooling):
+        pool_type.name = 'avg'
+
+    type_name = pool_type.name
+    if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
+        type_name += '-projection'
+
+    Layer(
+        name=name,
+        type=LayerType.SPP_LAYER,
+        inputs=Input(input.name,
+                     spp=SpatialPyramidPool(pool_type=type_name,
+                                            channels=num_channels,
+                                            pyramid_height=pyramid_height,
+                                            img_width=img_width)
+        ),
+        **ExtraLayerAttribute.to_kwargs(layer_attr)
+    )
+    return LayerOutput(name, LayerType.SPP_LAYER, parents=[input], 
+                       num_filters=num_channels)
 
 
 def __img_norm_layer__(name, input, size, norm_type, scale, power,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh b/python/paddle/trainer_config_helpers/tests/configs/generate_protostr.sh
@@ -11,11 +11,9 @@ test_sequence_pooling test_lstmemory_layer test_grumemory_layer
 last_first_seq test_expand_layer test_ntm_layers test_hsigmoid
 img_layers util_layers simple_rnn_layers unused_layers test_cost_layers
 test_rnn_group shared_fc shared_lstm test_cost_layers_with_weight
-# test_maxout test_bi_grumemory math_ops test_spp_layer)
 test_maxout test_bi_grumemory math_ops test_spp_layer)
 
 
-
 for conf in ${configs[*]}
 do
     echo "Generating " $conf