Skip to content

Commit 5961b52

Browse files
authored
Merge pull request #1653 from Noplz/normalize-layer
CrossChannelNorm Layer for SSD
2 parents bfc3310 + 21b7f4a commit 5961b52

File tree

11 files changed

+279
-16
lines changed

11 files changed

+279
-16
lines changed

doc/api/v2/config/layer.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,12 @@ sum_to_one_norm
109109
:members: sum_to_one_norm
110110
:noindex:
111111

112+
cross_channel_norm
113+
------------------
114+
.. automodule:: paddle.v2.layer
115+
:members: cross_channel_norm
116+
:noindex:
117+
112118
Recurrent Layers
113119
================
114120

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "Layer.h"
16+
#include "NormLayer.h"
17+
#include "paddle/math/BaseMatrix.h"
18+
#include "paddle/math/Matrix.h"
19+
20+
namespace paddle {
21+
22+
MatrixPtr CrossChannelNormLayer::createSampleMatrix(MatrixPtr data,
23+
size_t iter,
24+
size_t spatialDim) {
25+
return Matrix::create(data->getData() + iter * channels_ * spatialDim,
26+
channels_,
27+
spatialDim,
28+
false,
29+
useGpu_);
30+
}
31+
32+
MatrixPtr CrossChannelNormLayer::createSpatialMatrix(MatrixPtr data,
33+
size_t iter,
34+
size_t spatialDim) {
35+
return Matrix::create(
36+
data->getData() + iter * spatialDim, 1, spatialDim, false, useGpu_);
37+
}
38+
39+
void CrossChannelNormLayer::forward(PassType passType) {
40+
Layer::forward(passType);
41+
MatrixPtr inV = getInputValue(0);
42+
43+
size_t batchSize = inV->getHeight();
44+
size_t dataDim = inV->getWidth();
45+
CHECK_EQ(getSize(), dataDim);
46+
47+
reserveOutput(batchSize, dataDim);
48+
MatrixPtr outV = getOutputValue();
49+
size_t spatialDim = dataDim / channels_;
50+
51+
Matrix::resizeOrCreate(dataBuffer_, batchSize, dataDim, false, useGpu_);
52+
Matrix::resizeOrCreate(spatialBuffer_, 1, spatialDim, false, useGpu_);
53+
Matrix::resizeOrCreate(normBuffer_, batchSize, spatialDim, false, useGpu_);
54+
normBuffer_->zeroMem();
55+
// add eps to avoid overflow
56+
normBuffer_->addScalar(*normBuffer_, 1e-6);
57+
inV->square2(*dataBuffer_);
58+
for (size_t i = 0; i < batchSize; i++) {
59+
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
60+
const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim);
61+
MatrixPtr outVTmp = createSampleMatrix(outV, i, spatialDim);
62+
MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim);
63+
64+
// compute norm.
65+
spatialBuffer_->sumCols(*dataTmp, 1, 0);
66+
spatialBuffer_->sqrt2(*spatialBuffer_);
67+
normTmp->copyFrom(*spatialBuffer_);
68+
outVTmp->copyFrom(*inVTmp);
69+
outVTmp->divRowVector(*spatialBuffer_);
70+
// scale the layer.
71+
outVTmp->mulColVector(*scale_->getW());
72+
}
73+
}
74+
75+
void CrossChannelNormLayer::backward(const UpdateCallback& callback) {
76+
MatrixPtr inG = getInputGrad(0);
77+
MatrixPtr inV = getInputValue(0);
78+
MatrixPtr outG = getOutputGrad();
79+
MatrixPtr outV = getOutputValue();
80+
81+
size_t batchSize = inG->getHeight();
82+
size_t dataDim = inG->getWidth();
83+
size_t spatialDim = dataDim / channels_;
84+
85+
dataBuffer_->dotMul(*outG, *outV);
86+
Matrix::resizeOrCreate(scaleDiff_, channels_, 1, false, useGpu_);
87+
Matrix::resizeOrCreate(channelBuffer_, channels_, 1, false, useGpu_);
88+
Matrix::resizeOrCreate(sampleBuffer_, channels_, spatialDim, false, useGpu_);
89+
scaleDiff_->zeroMem();
90+
for (size_t i = 0; i < batchSize; i++) {
91+
MatrixPtr outGTmp = createSampleMatrix(outG, i, spatialDim);
92+
const MatrixPtr dataTmp = createSampleMatrix(dataBuffer_, i, spatialDim);
93+
const MatrixPtr inVTmp = createSampleMatrix(inV, i, spatialDim);
94+
const MatrixPtr inGTmp = createSampleMatrix(inG, i, spatialDim);
95+
const MatrixPtr normTmp = createSpatialMatrix(normBuffer_, i, spatialDim);
96+
97+
channelBuffer_->sumRows(*dataTmp, 1, 0);
98+
channelBuffer_->dotDiv(*channelBuffer_, *(scale_->getW()));
99+
// store a / scale[i] in scaleDiff_ temporary
100+
scaleDiff_->add(*channelBuffer_, 1.);
101+
102+
sampleBuffer_->dotMul(*inVTmp, *outGTmp);
103+
spatialBuffer_->sumCols(*sampleBuffer_, 1., 1.);
104+
// scale the grad
105+
inGTmp->copyFrom(*inVTmp);
106+
inGTmp->mulRowVector(*spatialBuffer_);
107+
// divide by square of norm
108+
spatialBuffer_->dotMul(*normTmp, *normTmp);
109+
inGTmp->divRowVector(*spatialBuffer_);
110+
// subtract
111+
inGTmp->add(*outGTmp, -1, 1);
112+
// divide by norm
113+
inGTmp->divRowVector(*normTmp);
114+
// scale the diff
115+
inGTmp->mulColVector(*scale_->getW());
116+
}
117+
// updata scale
118+
if (scale_->getWGrad()) scale_->getWGrad()->copyFrom(*scaleDiff_);
119+
scale_->getParameterPtr()->incUpdate(callback);
120+
}
121+
122+
} // namespace paddle

paddle/gserver/layers/NormLayer.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Layer* NormLayer::create(const LayerConfig& config) {
2626
return new ResponseNormLayer(config);
2727
} else if (norm == "cmrnorm-projection") {
2828
return new CMRProjectionNormLayer(config);
29+
} else if (norm == "cross-channel-norm") {
30+
return new CrossChannelNormLayer(config);
2931
} else {
3032
LOG(FATAL) << "Unknown norm type: " << norm;
3133
return nullptr;
@@ -54,4 +56,14 @@ bool ResponseNormLayer::init(const LayerMap& layerMap,
5456
return true;
5557
}
5658

59+
bool CrossChannelNormLayer::init(const LayerMap& layerMap,
60+
const ParameterMap& parameterMap) {
61+
Layer::init(layerMap, parameterMap);
62+
CHECK(parameters_[0]);
63+
const NormConfig& conf = config_.inputs(0).norm_conf();
64+
channels_ = conf.channels();
65+
scale_.reset(new Weight(channels_, 1, parameters_[0]));
66+
return true;
67+
}
68+
5769
} // namespace paddle

paddle/gserver/layers/NormLayer.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,35 @@ class ResponseNormLayer : public NormLayer {
6565
}
6666
};
6767

68+
/**
69+
* This layer applys normalization across the channels of each sample to a
70+
* conv layer's output, and scales the output by a group of trainable factors
71+
* whose dimensions equal to the number of channels.
72+
* - Input: One and only one input layer are accepted.
73+
* - Output: The normalized data of the input data.
74+
* Reference:
75+
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
76+
* Cheng-Yang Fu, Alexander C. Berg. SSD: Single Shot MultiBox Detector
77+
*/
78+
class CrossChannelNormLayer : public NormLayer {
79+
public:
80+
explicit CrossChannelNormLayer(const LayerConfig& config)
81+
: NormLayer(config) {}
82+
bool init(const LayerMap& layerMap, const ParameterMap& parameterMap);
83+
void forward(PassType passType);
84+
void backward(const UpdateCallback& callback);
85+
MatrixPtr createSampleMatrix(MatrixPtr data, size_t iter, size_t spatialDim);
86+
MatrixPtr createSpatialMatrix(MatrixPtr data, size_t iter, size_t spatialDim);
87+
88+
protected:
89+
size_t channels_;
90+
std::unique_ptr<Weight> scale_;
91+
MatrixPtr scaleDiff_;
92+
MatrixPtr normBuffer_;
93+
MatrixPtr dataBuffer_;
94+
MatrixPtr channelBuffer_;
95+
MatrixPtr spatialBuffer_;
96+
MatrixPtr sampleBuffer_;
97+
};
98+
6899
} // namespace paddle

paddle/gserver/layers/PriorBox.cpp

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ namespace paddle {
2020
/**
2121
* @brief A layer for generating priorbox locations and variances.
2222
* - Input: Two and only two input layer are accepted. The input layer must be
23-
* be a data output layer and a convolution output layer.
23+
* be a data output layer and a convolution output layer.
2424
* - Output: The priorbox locations and variances of the input data.
2525
* Reference:
2626
* Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
@@ -45,27 +45,32 @@ class PriorBoxLayer : public Layer {
4545
MatrixPtr buffer_;
4646
};
4747

48+
REGISTER_LAYER(priorbox, PriorBoxLayer);
49+
4850
bool PriorBoxLayer::init(const LayerMap& layerMap,
4951
const ParameterMap& parameterMap) {
5052
Layer::init(layerMap, parameterMap);
5153
auto pbConf = config_.inputs(0).priorbox_conf();
54+
std::vector<real> tmp;
55+
aspectRatio_.push_back(1.);
5256
std::copy(pbConf.min_size().begin(),
5357
pbConf.min_size().end(),
5458
std::back_inserter(minSize_));
5559
std::copy(pbConf.max_size().begin(),
5660
pbConf.max_size().end(),
5761
std::back_inserter(maxSize_));
58-
std::copy(pbConf.aspect_ratio().begin(),
59-
pbConf.aspect_ratio().end(),
60-
std::back_inserter(aspectRatio_));
6162
std::copy(pbConf.variance().begin(),
6263
pbConf.variance().end(),
6364
std::back_inserter(variance_));
65+
std::copy(pbConf.aspect_ratio().begin(),
66+
pbConf.aspect_ratio().end(),
67+
std::back_inserter(tmp));
6468
// flip
65-
int inputRatioLength = aspectRatio_.size();
66-
for (int index = 0; index < inputRatioLength; index++)
67-
aspectRatio_.push_back(1 / aspectRatio_[index]);
68-
aspectRatio_.push_back(1.);
69+
int inputRatioLength = tmp.size();
70+
for (int index = 0; index < inputRatioLength; index++) {
71+
aspectRatio_.push_back(tmp[index]);
72+
aspectRatio_.push_back(1 / tmp[index]);
73+
}
6974
numPriors_ = aspectRatio_.size();
7075
if (maxSize_.size() > 0) numPriors_++;
7176
return true;
@@ -94,12 +99,12 @@ void PriorBoxLayer::forward(PassType passType) {
9499
for (int w = 0; w < layerWidth; ++w) {
95100
real centerX = (w + 0.5) * stepW;
96101
real centerY = (h + 0.5) * stepH;
97-
int minSize = 0;
102+
real minSize = 0;
98103
for (size_t s = 0; s < minSize_.size(); s++) {
99104
// first prior.
100105
minSize = minSize_[s];
101-
int boxWidth = minSize;
102-
int boxHeight = minSize;
106+
real boxWidth = minSize;
107+
real boxHeight = minSize;
103108
// xmin, ymin, xmax, ymax.
104109
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
105110
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
@@ -112,7 +117,7 @@ void PriorBoxLayer::forward(PassType passType) {
112117
CHECK_EQ(minSize_.size(), maxSize_.size());
113118
// second prior.
114119
for (size_t s = 0; s < maxSize_.size(); s++) {
115-
int maxSize = maxSize_[s];
120+
real maxSize = maxSize_[s];
116121
boxWidth = boxHeight = sqrt(minSize * maxSize);
117122
tmpPtr[idx++] = (centerX - boxWidth / 2.) / imageWidth;
118123
tmpPtr[idx++] = (centerY - boxHeight / 2.) / imageHeight;
@@ -145,6 +150,5 @@ void PriorBoxLayer::forward(PassType passType) {
145150
MatrixPtr outV = getOutputValue();
146151
outV->copyFrom(buffer_->data_, dim * 2);
147152
}
148-
REGISTER_LAYER(priorbox, PriorBoxLayer);
149153

150154
} // namespace paddle

paddle/gserver/tests/test_LayerGrad.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1642,6 +1642,25 @@ TEST(Layer, PadLayer) {
16421642
}
16431643
}
16441644

1645+
TEST(Layer, CrossChannelNormLayer) {
1646+
TestConfig config;
1647+
config.layerConfig.set_type("norm");
1648+
config.layerConfig.set_size(100);
1649+
LayerInputConfig* input = config.layerConfig.add_inputs();
1650+
NormConfig* norm = input->mutable_norm_conf();
1651+
norm->set_norm_type("cross-channel-norm");
1652+
norm->set_channels(10);
1653+
norm->set_size(100);
1654+
norm->set_scale(0);
1655+
norm->set_pow(0);
1656+
norm->set_blocked(0);
1657+
config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10});
1658+
1659+
for (auto useGpu : {false, true}) {
1660+
testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false, 5);
1661+
}
1662+
}
1663+
16451664
TEST(Layer, smooth_l1) {
16461665
TestConfig config;
16471666
config.layerConfig.set_type("smooth_l1");

paddle/math/BaseMatrix.cu

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,6 +1453,24 @@ void BaseMatrixT<T>::divRowVector(BaseMatrixT& b) {
14531453
true_type() /* bAsRowVector */, false_type());
14541454
}
14551455

1456+
template<class T>
1457+
void BaseMatrixT<T>::mulColVector(BaseMatrixT& b) {
1458+
MatrixOffset offset(0, 0, 0, 0);
1459+
int numRows = height_;
1460+
int numCols = width_;
1461+
applyBinary(binary::DotMul<T>(), b, numRows, numCols, offset,
1462+
false_type(), true_type() /* bAsColVector */);
1463+
}
1464+
1465+
template<class T>
1466+
void BaseMatrixT<T>::divColVector(BaseMatrixT& b) {
1467+
MatrixOffset offset(0, 0, 0, 0);
1468+
int numRows = height_;
1469+
int numCols = width_;
1470+
applyBinary(binary::DotDiv<T>(), b, numRows, numCols, offset,
1471+
false_type(), true_type() /* bAsColVector */);
1472+
}
1473+
14561474
template<>
14571475
template <class Agg>
14581476
int BaseMatrixT<real>::applyRow(Agg agg, BaseMatrixT& b) {

paddle/math/BaseMatrix.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,9 @@ class BaseMatrixT : public TensorExpression<BaseMatrixT<T>, T> {
545545
void mulRowVector(BaseMatrixT& b);
546546
void divRowVector(BaseMatrixT& b);
547547

548+
void mulColVector(BaseMatrixT& b);
549+
void divColVector(BaseMatrixT& b);
550+
548551
void addP2P(BaseMatrixT& b);
549552

550553
/**

paddle/math/tests/test_BaseMatrix.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ TEST(BaseMatrix, BaseMatrix) {
110110
compare(&BaseMatrix::addRowVector);
111111
compare(&BaseMatrix::mulRowVector);
112112
compare(&BaseMatrix::divRowVector);
113+
compare(&BaseMatrix::mulColVector);
114+
compare(&BaseMatrix::divColVector);
113115
compare(&BaseMatrix::addP2P);
114116
compare(&BaseMatrix::invSqrt);
115117
}

python/paddle/trainer/config_parser.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1220,9 +1220,11 @@ def parse_image(image, input_layer_name, image_conf):
12201220

12211221
def parse_norm(norm, input_layer_name, norm_conf):
12221222
norm_conf.norm_type = norm.norm_type
1223-
config_assert(norm.norm_type in ['rnorm', 'cmrnorm-projection'],
1224-
"norm-type %s is not in [rnorm, 'cmrnorm-projection']" %
1225-
norm.norm_type)
1223+
config_assert(
1224+
norm.norm_type in
1225+
['rnorm', 'cmrnorm-projection', 'cross-channel-norm'],
1226+
"norm-type %s is not in [rnorm, cmrnorm-projection, cross-channel-norm]"
1227+
% norm.norm_type)
12261228
norm_conf.channels = norm.channels
12271229
norm_conf.size = norm.size
12281230
norm_conf.scale = norm.scale
@@ -1898,6 +1900,9 @@ def __init__(self, name, inputs, **xargs):
18981900
norm_conf)
18991901
self.set_cnn_layer(name, norm_conf.output_y, norm_conf.output_x,
19001902
norm_conf.channels, False)
1903+
if norm_conf.norm_type == "cross-channel-norm":
1904+
self.create_input_parameter(0, norm_conf.channels,
1905+
[norm_conf.channels, 1])
19011906

19021907

19031908
@config_layer('pool')

0 commit comments

Comments
 (0)