Skip to content

Commit 8a49f7f

Browse files
add epsilon in bn
1 parent 08bc08d commit 8a49f7f

File tree

11 files changed

+33
-15
lines changed

11 files changed

+33
-15
lines changed

paddle/gserver/layers/BatchNormBaseLayer.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
4141
useGlobalStats_ = config_.use_global_stats();
4242
}
4343
movingAvgFraction_ = config_.moving_average_fraction();
44+
EPS = config_.epsilon();
4445

4546
weight_.reset(new Weight(1, channels_, parameters_[0]));
4647
movingMean_.reset(new Weight(1, channels_, parameters_[1]));

paddle/gserver/layers/BatchNormBaseLayer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ class BatchNormBaseLayer : public Layer {
9494
bool useGlobalStats_;
9595
// use to compute moving mean and variance.
9696
real movingAvgFraction_;
97+
// Epsilon value used in the batch normalization formula.
98+
real EPS;
9799
};
98100

99101
} // namespace paddle

paddle/gserver/layers/BatchNormalizationLayer.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ namespace paddle {
2222

2323
REGISTER_LAYER(batch_norm, BatchNormalizationLayer);
2424

25-
const real BatchNormalizationLayer::EPS = 1E-5;
26-
2725
bool BatchNormalizationLayer::init(const LayerMap& layerMap,
2826
const ParameterMap& parameterMap) {
2927
/* Initialize the basic parent class */

paddle/gserver/layers/BatchNormalizationLayer.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,6 @@ class BatchNormalizationLayer : public BatchNormBaseLayer {
3939
void backward(const UpdateCallback& callback = nullptr) override;
4040

4141
protected:
42-
/// Epsilon value used in the batch normalization formula.
43-
static const real EPS;
44-
4542
/// Load pre-calculated mean and std.
4643
void setMeanAndStd();
4744

paddle/gserver/layers/CudnnBatchNormLayer.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace paddle {
2121

2222
REGISTER_LAYER(cudnn_batch_norm, CudnnBatchNormLayer);
2323

24-
const double CudnnBatchNormLayer::EPS = 1E-5;
24+
const double CudnnBatchNormLayer::MIN_EPS = 1E-5;
2525

2626
bool CudnnBatchNormLayer::init(const LayerMap& layerMap,
2727
const ParameterMap& parameterMap) {
@@ -60,6 +60,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
6060
real* beta = biases_->getW()->getData();
6161
real* movingMean = movingMean_->getW()->getData();
6262
real* movingVar = movingVar_->getW()->getData();
63+
EPS_ = std::max(MIN_EPS, static_cast<double>(EPS));
6364

6465
if (!useGlobalStats_) {
6566
REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str());
@@ -75,7 +76,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
7576
1.0 - movingAvgFraction_,
7677
movingMean,
7778
movingVar,
78-
EPS,
79+
EPS_,
7980
savedMean,
8081
savedInvVar);
8182
} else {
@@ -90,7 +91,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
9091
beta,
9192
movingMean,
9293
movingVar,
93-
EPS);
94+
EPS_);
9495
} else {
9596
// There is a limitation in cudnn library.
9697
// When the batch size is larger than 1024 in cuDNN v5.1,
@@ -101,7 +102,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
101102
beta,
102103
movingMean,
103104
movingVar,
104-
EPS,
105+
EPS_,
105106
batchSize,
106107
channels_,
107108
imageH_ * imageD_,
@@ -127,6 +128,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
127128
real* gamma = weight_->getW()->getData();
128129
real* savedMean = savedMean_->getData();
129130
real* savedInvVar = savedInvVar_->getData();
131+
EPS_ = std::max(MIN_EPS, static_cast<double>(EPS));
130132

131133
auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) {
132134
Matrix::resizeOrCreate(m, h, w, false, true);
@@ -157,7 +159,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
157159
gamma,
158160
gammaGrad,
159161
betaGrad,
160-
EPS,
162+
EPS_,
161163
savedMean,
162164
savedInvVar);
163165

paddle/gserver/layers/CudnnBatchNormLayer.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,14 @@ class CudnnBatchNormLayer : public BatchNormBaseLayer {
4747

4848
protected:
4949
/**
50-
* Epsilon value used in the batch normalization formula.
5150
* Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
5251
* Same epsilon value should be used in forward and backward functions.
5352
*/
54-
static const double EPS;
53+
static const double MIN_EPS;
54+
55+
/// Epsilon value used in the batch normalization formula.
56+
/// If EPS_ is smaller than MIN_EPS, MIN_EPS will be used.
57+
double EPS_;
5558

5659
/// Input/output tensor descriptor desc
5760
hl_tensor_descriptor ioDesc_;

paddle/gserver/layers/MKLDNNBatchNormLayer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ namespace paddle {
2121

2222
REGISTER_LAYER(mkldnn_batch_norm, MKLDNNBatchNormLayer);
2323

24-
const real MKLDNNBatchNormLayer::EPS = 1E-5;
25-
2624
bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
2725
const ParameterMap& parameterMap) {
2826
if (!MKLDNNLayer::init(layerMap, parameterMap)) {
@@ -50,6 +48,8 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
5048
useGlobalStats_ = config_.use_global_stats();
5149
}
5250
movingAvgFraction_ = config_.moving_average_fraction();
51+
EPS = config_.epsilon();
52+
5353
VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use")
5454
<< " --- global stats";
5555
VLOG(MKLDNN_BASE) << "Moving average fraction: " << movingAvgFraction_;

paddle/gserver/layers/MKLDNNBatchNormLayer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ class MKLDNNBatchNormLayer : public MKLDNNLayer {
3232
std::shared_ptr<bn_fwd::primitive_desc> fwdPD_;
3333

3434
// Epsilon value used in the batch normalization formula.
35-
static const real EPS;
35+
real EPS;
36+
3637
// weight and bias in paddle
3738
std::unique_ptr<Weight> weight_;
3839
std::unique_ptr<Weight> biases_;

proto/ModelConfig.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,10 @@ message LayerConfig {
540540

541541
// for switch order layer
542542
optional ReshapeConfig reshape_conf = 59;
543+
544+
// for batch normalization layer
545+
// small constant added to the variance to avoid numerical problems.
546+
optional double epsilon = 60 [ default = 0.00001 ];
543547
}
544548

545549
message EvaluatorConfig {

python/paddle/trainer/config_parser.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2434,6 +2434,7 @@ def __init__(self,
24342434
bias=True,
24352435
img3D=False,
24362436
use_global_stats=True,
2437+
epsilon=1e-5,
24372438
moving_average_fraction=0.9,
24382439
batch_norm_type=None,
24392440
mean_var_names=None,
@@ -2482,6 +2483,8 @@ def __init__(self,
24822483
self.config.use_global_stats = use_global_stats
24832484
if moving_average_fraction is not None:
24842485
self.config.moving_average_fraction = moving_average_fraction
2486+
if epsilon is not None:
2487+
self.config.epsilon = epsilon
24852488

24862489
input_layer = self.get_input_layer(0)
24872490
image_conf = self.config.inputs[0].image_conf

0 commit comments

Comments
 (0)