Skip to content

Commit 043b16e

Browse files
committed
updated batchnorm
1 parent 32dcfcd commit 043b16e

File tree

2 files changed

+36
-57
lines changed

2 files changed

+36
-57
lines changed

mozi/layers/normalization.py

Lines changed: 29 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,75 +3,56 @@
33
from mozi.utils.theano_utils import shared_zeros
44
from mozi.weight_init import UniformWeight
55
import theano.tensor as T
6+
import theano
7+
floatX = theano.config.floatX
68

79
class BatchNormalization(Template):
8-
'''
9-
Adapted From keras
10-
REFERENCE:
11-
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
12-
http://arxiv.org/pdf/1502.03167v3.pdf
13-
14-
mode: 0 -> featurewise normalization
15-
1 -> samplewise normalization (may sometimes outperform featurewise mode)
16-
17-
momentum: momentum term in the computation of a running estimate of the mean and std of the data
18-
'''
19-
def __init__(self, input_shape, epsilon=1e-6, mode=0, momentum=0.9):
10+
11+
def __init__(self, input_shape, epsilon=1e-6, mode=0, gamma_init=UniformWeight(), memory=0.9):
12+
'''
13+
REFERENCE:
14+
Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift
15+
http://arxiv.org/pdf/1502.03167v3.pdf
16+
PARAMS:
17+
memory:
18+
y_t is the latest value, the moving average x_tp1 is calculated as
19+
x_tp1 = memory * y_t + (1-memory) * x_t, the larger the memory, the
20+
more weight is put on contempory.
21+
epsilon:
22+
denominator min value for preventing division by zero in computing std
23+
'''
2024
self.input_shape = input_shape
2125
self.epsilon = epsilon
22-
self.mode = mode
23-
self.momentum = momentum
26+
self.mem = memory
2427

25-
self.init = UniformWeight()
26-
self.gamma = self.init((self.input_shape), name='gamma')
28+
self.gamma = gamma_init(self.input_shape, name='gamma')
2729
self.beta = shared_zeros(self.input_shape, name='beta')
2830

29-
self.running_mean = None
30-
self.running_std = None
31+
self.moving_mean = 0
32+
self.moving_std = 0
3133

3234
self.params = [self.gamma, self.beta]
3335

3436

3537
def _train_fprop(self, state_below):
38+
miu = state_below.mean(axis=0)
39+
std = T.std(state_below, axis=0)
40+
Z = (state_below - miu) / (std + self.epsilon)
3641

37-
if self.mode == 0:
38-
m = state_below.mean(axis=0)
39-
# manual computation of std to prevent NaNs
40-
std = T.mean((state_below-m)**2 + self.epsilon, axis=0) ** 0.5
41-
X_normed = (state_below - m) / (std + self.epsilon)
42-
43-
if self.running_mean is None:
44-
self.running_mean = m
45-
self.running_std = std
46-
else:
47-
self.running_mean *= self.momentum
48-
self.running_mean += (1-self.momentum) * m
49-
self.running_std *= self.momentum
50-
self.running_std += (1-self.momentum) * std
51-
52-
elif self.mode == 1:
53-
m = state_below.mean(axis=-1, keepdims=True)
54-
std = state_below.std(axis=-1, keepdims=True)
55-
X_normed = (state_below - m) / (std + self.epsilon)
42+
self.moving_mean += self.mem * miu + (1-self.mem) * self.moving_mean
43+
self.moving_std += self.mem * std + (1-self.mem) * self.moving_std
5644

57-
return self.gamma * X_normed + self.beta
45+
return self.gamma * Z + self.beta
5846

5947

6048
def _test_fprop(self, state_below):
61-
62-
if self.mode == 0:
63-
X_normed = (state_below - self.running_mean) / (self.running_std + self.epsilon)
64-
65-
elif self.mode == 1:
66-
m = state_below.mean(axis=-1, keepdims=True)
67-
std = state_below.std(axis=-1, keepdims=True)
68-
X_normed = (state_below - m) / (std + self.epsilon)
69-
70-
return self.gamma * X_normed + self.beta
49+
Z = (state_below - self.moving_mean) / (self.moving_std + self.epsilon)
50+
return self.gamma * Z + self.beta
7151

7252

7353
class LRN(Template):
7454
"""
55+
Adapted from pylearn2
7556
Local Response Normalization
7657
"""
7758

mozi/weight_init.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,20 +24,18 @@ def __init__(self, mean=0, std=0.1):
2424
self.std = std
2525

2626
def __call__(self, dim, name='W'):
27-
W_values = np.asarray(np.random.normal(loc = self.mean, scale = self.std,
28-
size = dim),
29-
dtype = floatX)
27+
W_values = np.random.normal(loc=self.mean, scale=self.std, size=dim).astype(floatX)
28+
3029
return theano.shared(name=name, value=W_values, borrow=True)
3130

3231

3332
class XavierUniformWeight(WeightInitialization):
3433
def __call__(self, dim, name='W'):
3534
fan_in, fan_out = get_fans(dim)
36-
W_values = np.asarray(np.random.uniform(
37-
low = -4 * np.sqrt(6. / (fan_in + fan_out)),
38-
high = 4 * np.sqrt(6. / (fan_in + fan_out)),
39-
size = dim),
40-
dtype = floatX)
35+
W_values = np.random.uniform(low = -4 * np.sqrt(6. / (fan_in + fan_out)),
36+
high = 4 * np.sqrt(6. / (fan_in + fan_out)),
37+
size = dim).astype(floatX)
38+
4139

4240
return theano.shared(name=name, value=W_values, borrow=True)
4341

@@ -47,5 +45,5 @@ def __init__(self, scale=0.05):
4745
self.scale = scale
4846

4947
def __call__(self, dim, name='W'):
50-
W_values = np.random.uniform(low=-self.scale, high=self.scale, size=dim)
48+
W_values = np.random.uniform(low=-self.scale, high=self.scale, size=dim).astype(floatX)
5149
return theano.shared(name=name, value=W_values, borrow=True)

0 commit comments

Comments
 (0)