Skip to content

Commit 5ece5c9

Browse files
committed
add python wrap for sppLayer
1 parent b282caf commit 5ece5c9

File tree

12 files changed

+155
-15
lines changed

12 files changed

+155
-15
lines changed

doc/ui/api/trainer_config_helpers/layers.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@ conv_operator
4646
:members: conv_operator
4747
:noindex:
4848

49+
conv_projection
50+
-------------
51+
.. automodule:: paddle.trainer_config_helpers.layers
52+
:members: conv_projection
53+
:noindex:
54+
4955
conv_shift_layer
5056
------------------
5157
.. automodule:: paddle.trainer_config_helpers.layers
@@ -71,6 +77,12 @@ img_pool_layer
7177
--------------
7278
.. automodule:: paddle.trainer_config_helpers.layers
7379
:members: img_pool_layer
80+
:noindex:
81+
82+
spp_layer
83+
--------------
84+
.. automodule:: paddle.trainer_config_helpers.layers
85+
:members: spp_layer
7486
:noindex:
7587

7688
maxout_layer

paddle/gserver/layers/PoolProjection.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ limitations under the License. */
1616

1717
namespace paddle {
1818

19-
REGISTER_PROJECTION_CREATE_FUNC(pool2, &PoolProjection::create);
19+
REGISTER_PROJECTION_CREATE_FUNC(pool, &PoolProjection::create);
2020

2121
PoolProjection* PoolProjection::create(const ProjectionConfig& config,
2222
ParameterPtr parameter, bool useGpu) {

paddle/gserver/layers/SpatialPyramidPoolLayer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ ProjectionConfig SpatialPyramidPoolLayer::getConfig(size_t imgSizeW,
2424
size_t pyramidLevel,
2525
std::string& poolType) {
2626
ProjectionConfig config;
27-
config.set_type("pool2");
27+
config.set_type("pool");
2828
PoolConfig* conf = config.mutable_pool_conf();
2929
conf->set_channels(channels);
3030
conf->set_img_size(imgSizeW);
@@ -93,15 +93,15 @@ bool SpatialPyramidPoolLayer::init(const LayerMap& layerMap,
9393
startCol = endCol;
9494
projInput_.emplace_back(Argument());
9595
}
96-
outputSize_ = endCol;
96+
CHECK_EQ(endCol, getSize());
9797
return true;
9898
}
9999

100100
void SpatialPyramidPoolLayer::forward(PassType passType) {
101101
Layer::forward(passType);
102102

103103
int batchSize = getInput(0).getBatchSize();
104-
resetOutput(batchSize, outputSize_);
104+
resetOutput(batchSize, getSize());
105105
for (size_t i = 0; i < pyramidHeight_; i++) {
106106
size_t startCol = projCol_[i].first;
107107
size_t endCol = projCol_[i].second;

paddle/gserver/layers/SpatialPyramidPoolLayer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ class SpatialPyramidPoolLayer : public Layer {
2727
size_t imgSizeW_;
2828
size_t imgSizeH_;
2929
size_t pyramidHeight_;
30-
size_t outputSize_;
3130
std::string poolType_;
3231

3332
std::vector<std::unique_ptr<PoolProjection>> poolProjections_;

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,8 @@ void testSppLayer(const string& poolType, const int pyramidHeight, bool trans,
931931
sppConfig->set_channels(16);
932932
sppConfig->set_img_size(10);
933933
sppConfig->set_img_size_y(20);
934+
int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1);
935+
config.layerConfig.set_size(outputSize * sppConfig->channels());
934936
testLayerGrad(config, "spp", 100, trans, useGpu);
935937
}
936938

paddle/math/Matrix.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,18 +1510,19 @@ void CpuMatrix::maxPoolForward(Matrix& inputMat, size_t imgSizeH,
15101510
CHECK(inHeight * inWidth == inputMat.getWidth() / channels);
15111511
CHECK_EQ(num, this->getHeight());
15121512
CHECK_EQ(channels * outputH * outputW, this->getWidth());
1513+
size_t outStride = getStride();
15131514

15141515
/* initialize the data_ */
15151516
for (size_t i = 0; i < height_; i++) {
15161517
for (size_t j = 0; j < width_; j++) {
1517-
outData[i * getStride() + j] = -(real)FLT_MAX;
1518+
outData[i * outStride + j] = -(real)FLT_MAX;
15181519
}
15191520
}
15201521

15211522
/* pool max one by one */
15221523
for (size_t n = 0; n < num; ++n) { // frame by frame
15231524
if (!isContiguous()) {
1524-
outData = data_ + n * getStride();
1525+
outData = data_ + n * outStride;
15251526
}
15261527
for (size_t c = 0; c < channels; ++c) { // channel by channel
15271528
for (size_t ph = 0; ph < outputH; ++ph) {
@@ -1564,10 +1565,15 @@ void CpuMatrix::maxPoolBackward(Matrix& image, size_t imgSizeH, size_t imgSizeW,
15641565
real* inData = image.getData();
15651566
real* otData = outV.getData();
15661567
real* otGrad = outGrad.getData();
1568+
1569+
size_t outStride = outV.getStride();
1570+
real* origOutData = otData;
1571+
real* origOutGrad = otGrad;
1572+
15671573
for (size_t n = 0; n < num; ++n) {
15681574
if (!outV.isContiguous()) {
1569-
otData = outV.getData() + n * outV.getStride();
1570-
otGrad = outGrad.getData() + n * outGrad.getStride();
1575+
otData = origOutData + n * outStride;
1576+
otGrad = origOutGrad + n * outStride;
15711577
}
15721578
for (size_t c = 0; c < channels; ++c) {
15731579
for (size_t ph = 0; ph < outputH; ++ph) {

proto/ModelConfig.proto.m4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -202,11 +202,11 @@ message ProjectionConfig {
202202
optional ConvConfig conv_conf = 8;
203203
optional int32 num_filters = 9;
204204

205-
// For pool
206-
optional PoolConfig pool_conf = 10;
207-
208205
// For IdentityOffsetProjection
209206
optional uint64 offset = 11 [default = 0];
207+
208+
// For pool
209+
optional PoolConfig pool_conf = 12;
210210
}
211211

212212
message OperatorConfig {

python/paddle/trainer/config_parser.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,7 @@ def __init__(
470470
image=None,
471471
block_expand=None,
472472
maxout=None,
473+
spp=None,
473474
format=None,
474475
nnz=None,
475476
is_static=None,
@@ -669,7 +670,6 @@ def calc_bias_size(self):
669670
def calc_parameter_dims(self, input_size, output_size):
670671
return None
671672

672-
673673
# Define a operator for mixed layer
674674
@config_class
675675
class Operator(Cfg):
@@ -783,6 +783,15 @@ def __init__(
783783
padding_y = None):
784784
self.add_keys(locals())
785785

786+
class SpatialPyramidPool(Cfg):
787+
def __init__(
788+
self,
789+
pool_type,
790+
pyramid_height,
791+
channels,
792+
img_width = None):
793+
self.add_keys(locals())
794+
786795
# please refer to the comments in proto/ModelConfig.proto
787796
@config_class
788797
class Norm(Cfg):
@@ -1043,6 +1052,22 @@ def parse_pool(pool, input_layer_name, pool_conf):
10431052
2*pool_conf.padding_y - pool_conf.size_y) / \
10441053
float(pool_conf.stride_y))) + 1
10451054

1055+
def parse_spp(spp, input_layer_name, spp_conf):
1056+
spp_conf.pool_type = spp.pool_type
1057+
config_assert(spp.pool_type in ['max-projection', 'avg-projection'],
1058+
"pool-type %s is not in " "['max-projection', 'avg-projection']"
1059+
% spp.pool_type)
1060+
spp_conf.pyramid_height = spp.pyramid_height
1061+
spp_conf.channels = spp.channels
1062+
1063+
img_pixels = g_layer_map[input_layer_name].size / spp_conf.channels
1064+
1065+
spp_conf.img_size = default(spp.img_width, int(img_pixels ** 0.5))
1066+
spp_conf.img_size_y = img_pixels / spp_conf.img_size
1067+
config_assert(spp_conf.img_size * spp_conf.img_size_y == img_pixels,
1068+
"Incorrect input image size %d for input image pixels %d"
1069+
% (spp_conf.img_size, img_pixels))
1070+
10461071
def parse_image(image, input_layer_name, image_conf):
10471072
image_conf.channels = image.channels
10481073
image_pixels = g_layer_map[input_layer_name].size / image_conf.channels
@@ -1649,6 +1674,25 @@ def __init__(
16491674
name, pool_conf.output_y, pool_conf.output_x))
16501675
self.set_layer_size((pool_conf.output_x * pool_conf.output_y) * pool_conf.channels)
16511676

1677+
@config_layer('spp')
1678+
class SpatialPyramidPoolLayer(LayerBase):
1679+
def __init__(
1680+
self,
1681+
name,
1682+
inputs,
1683+
device=None):
1684+
super(SpatialPyramidPoolLayer, self).__init__(name, 'spp', 0, inputs=inputs, device=device)
1685+
for input_index in xrange(len(self.inputs)):
1686+
input_layer = self.get_input_layer(input_index)
1687+
parse_spp(
1688+
self.inputs[input_index].spp,
1689+
input_layer.name,
1690+
self.config.inputs[input_index].spp_conf)
1691+
spp_conf = self.config.inputs[input_index].spp_conf
1692+
output_size = (pow(4, spp_conf.pyramid_height) - 1) / (4 - 1)
1693+
print("output size for %s is %d " % (name, output_size))
1694+
self.set_layer_size(output_size * spp_conf.channels)
1695+
16521696
@config_layer('batch_norm')
16531697
class BatchNormLayer(LayerBase):
16541698
layer_type = 'batch_norm'

python/paddle/trainer_config_helpers/layers.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@
5555
'multi_binary_label_cross_entropy',
5656
'rank_cost', 'lambda_cost', 'huber_cost',
5757
'block_expand_layer',
58-
'maxout_layer', 'out_prod_layer', 'print_layer'
58+
'maxout_layer', 'out_prod_layer', 'print_layer',
59+
'spp_layer',
5960
]
6061

6162

@@ -111,6 +112,7 @@ class LayerType(object):
111112
LINEAR_COMBINATION_LAYER = "convex_comb"
112113
BLOCK_EXPAND = "blockexpand"
113114
MAXOUT = "maxout"
115+
SPP_LAYER = "spp"
114116

115117
PRINT_LAYER = "print"
116118

@@ -868,6 +870,7 @@ def pooling_layer(input, pooling_type=None, name=None, bias_attr=None,
868870
size=input.size)
869871

870872

873+
871874
@wrap_bias_attr_default()
872875
@wrap_param_attr_default()
873876
@wrap_act_default(param_names=['gate_act'],
@@ -1708,6 +1711,62 @@ def img_pool_layer(input, pool_size, name=None,
17081711
num_filters=num_channels)
17091712

17101713

1714+
@wrap_name_default("spp")
1715+
@layer_support()
1716+
def spp_layer(input, name=None, num_channels=None, pool_type=None,
1717+
pyramid_height=None, img_width=None, layer_attr=None):
1718+
pass
1719+
"""
1720+
Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition.
1721+
The details please refer to
1722+
`Kaiming He's paper <https://arxiv.org/abs/1406.4729>`_.
1723+
1724+
:param name: layer name.
1725+
:type name: basestring
1726+
:param input: layer's input.
1727+
:type input: LayerOutput
1728+
:param num_channels: number of input channel.
1729+
:type num_channels: int
1730+
:param pool_type: Pooling type. MaxPooling or AveragePooling. Default is MaxPooling.
1731+
:type scale: BasePoolingType
1732+
:param pyramid_height: pyramid height.
1733+
:type pyramid_height: int
1734+
:param img_width: the width of input feature map. If it is None, the input feature
1735+
map should be square.
1736+
:type img_width: int|None
1737+
:param layer_attr: Extra Layer Attribute.
1738+
:type layer_attr: ExtraLayerAttribute
1739+
:return: LayerOutput object.
1740+
:rtype: LayerOutput
1741+
"""
1742+
if num_channels is None:
1743+
assert input.num_filters is not None
1744+
num_channels = input.num_filters
1745+
1746+
if pool_type is None:
1747+
pool_type = MaxPooling()
1748+
elif isinstance(pool_type, AvgPooling):
1749+
pool_type.name = 'avg'
1750+
1751+
type_name = pool_type.name
1752+
if (isinstance(pool_type, AvgPooling) or isinstance(pool_type, MaxPooling)):
1753+
type_name += '-projection'
1754+
1755+
Layer(
1756+
name=name,
1757+
type=LayerType.SPP_LAYER,
1758+
inputs=Input(input.name,
1759+
spp=SpatialPyramidPool(pool_type=type_name,
1760+
channels=num_channels,
1761+
pyramid_height=pyramid_height,
1762+
img_width=img_width)
1763+
),
1764+
**ExtraLayerAttribute.to_kwargs(layer_attr)
1765+
)
1766+
return LayerOutput(name, LayerType.SPP_LAYER, parents=[input],
1767+
num_filters=num_channels)
1768+
1769+
17111770
def __img_norm_layer__(name, input, size, norm_type, scale, power,
17121771
num_channels, blocked, layer_attr):
17131772
if num_channels is None:

python/paddle/trainer_config_helpers/tests/configs/check.md5

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ fded24727338fb8ce44d9951ed8aea08 test_rnn_group.protostr
2020
67d6fde3afb54f389d0ce4ff14726fe1 test_sequence_pooling.protostr
2121
f586a548ef4350ba1ed47a81859a64cb unused_layers.protostr
2222
f937a5a6e7e8864b4d8cf56b0f7c7f44 util_layers.protostr
23+
60c9a71e19bd4b2a1253712799d0ae70 test_spp_layer.protostr

0 commit comments

Comments
 (0)