Skip to content

Commit 2e54321

Browse files
committed
Remove reset and rework inits.
This gets rid of `reset` as discussed in #15 and makes parameter initialization more modular. (More inits to follow.)
1 parent 02c8d47 commit 2e54321

File tree

10 files changed

+66
-48
lines changed

10 files changed

+66
-48
lines changed

beacon8/containers/Container.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@ def get_stat_updates(self):
3737
return stat_updates
3838

3939
def add(self, module):
40-
self.modules.append(module)
40+
self.modules.append(module)

beacon8/init/Const.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import numpy as _np
2+
3+
4+
def const(value):
5+
def init(shape, fan):
6+
return _np.full(shape, value)
7+
return init
8+
9+
zero = const(0)
10+
one = const(1)

beacon8/init/Xavier.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import numpy as _np
2+
3+
def xavier(shape, fan):
4+
assert fan is not None, "The parameter's `fan` needs to be specified when using Xavier initialization."
5+
6+
w_bound = _np.sqrt(4. / sum(fan))
7+
return _np.random.uniform(low=-w_bound, high=w_bound, size=shape)

beacon8/init/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .Const import const, zero, one
2+
from .Xavier import xavier

beacon8/layers/BatchNormalization.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
from .Module import Module
2+
from beacon8.init import zero, one
3+
from beacon8.utils import create_param, create_param_and_grad
24

35
import numpy as _np
46
import theano as _th
@@ -9,17 +11,16 @@ class BatchNormalization(Module):
911
def __init__(self, n_features, eps=None):
1012
Module.__init__(self)
1113

12-
self.weight = _th.shared(_np.ones(shape=(n_features,), dtype=_th.config.floatX))
13-
self.bias = _th.shared(_np.zeros(shape=(n_features, ), dtype=_th.config.floatX))
14-
self.grad_weight = _th.shared(_np.zeros(shape=(n_features,), dtype=_th.config.floatX))
15-
self.grad_bias = _th.shared(_np.zeros(shape=(n_features, ), dtype=_th.config.floatX))
14+
self.weight, self.grad_weight = create_param_and_grad(n_features, one, 'W_BN')
15+
self.bias, self.grad_bias = create_param_and_grad(n_features, zero, 'b_BN')
1616

17-
self.inference_weight = _th.shared(_np.ones(shape=(n_features,), dtype=_th.config.floatX))
18-
self.inference_bias = _th.shared(_np.zeros(shape=(n_features, ), dtype=_th.config.floatX))
17+
self.inference_weight = create_param(n_features, one, 'W_BN_inf')
18+
self.inference_bias = create_param(n_features, zero, 'b_BN_inf')
1919

20-
self.buffer_variance = _th.shared(_np.ones(shape=(n_features, ), dtype=_th.config.floatX))
21-
self.buffer_mean = _th.shared(_np.zeros(shape=(n_features, ), dtype=_th.config.floatX))
22-
self.buffer_counts = _th.shared(_np.asarray(0., dtype=_th.config.floatX))
20+
# These are buffers for collecting the minibatch statistics.
21+
self.buffer_variance = create_param(n_features, one, 'BN_var')
22+
self.buffer_mean = create_param(n_features, zero, 'BN_mean')
23+
self.buffer_counts = _th.shared(_np.asarray(0, dtype=_th.config.floatX))
2324

2425
self.eps = eps or 1e-5
2526

beacon8/layers/Linear.py

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,23 @@
11
from .Module import Module
2+
from beacon8.init import zero, xavier
3+
from beacon8.utils import create_param_and_grad
24

35
import numpy as _np
46
import theano as _th
57

68

79
class Linear(Module):
810

9-
def __init__(self, nin, nout, init='Xavier', with_bias=True):
11+
def __init__(self, nin, nout, init=xavier, with_bias=True, init_b=zero):
1012
Module.__init__(self)
1113

1214
self.nin = nin
1315
self.nout = nout
14-
self.init = init
1516
self.with_bias = with_bias
1617

17-
self.reset()
18-
19-
def reset(self):
20-
if self.init == 'Xavier':
21-
w_bound = _np.sqrt(4. / (self.nin + self.nout))
22-
W = _np.random.uniform(low=-w_bound, high=w_bound,
23-
size=(self.nin, self.nout))
24-
else:
25-
raise NotImplementedError
26-
27-
self.weight = _th.shared(W.astype(_th.config.floatX))
28-
self.grad_weight = _th.shared((W*0.).astype(_th.config.floatX))
29-
18+
self.weight, self.grad_weight = create_param_and_grad((nin, nout), init, fan=(nin, nout), name='Wlin_{}x{}'.format(nin, nout))
3019
if self.with_bias:
31-
self.bias = _th.shared(_np.zeros(shape=self.nout, dtype=_th.config.floatX))
32-
self.grad_bias = _th.shared(_np.zeros(shape=self.nout, dtype=_th.config.floatX))
20+
self.bias, self.grad_bias = create_param_and_grad(nout, init_b, name='blin_{}'.format(nout))
3321

3422
def symb_forward(self, symb_input):
3523
out = _th.tensor.dot(symb_input, self.weight)

beacon8/layers/Module.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ def __init__(self):
1111
self.fn_accum_grads = None
1212
self.fn_accum_stats = None
1313

14-
def reset(self):
15-
pass
16-
1714
#def __hash__(self):
1815
# raise NotImplementedError("You *need* to reimplement hash, even if it's just python's default. See the documentation for more info.")
1916

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from .Module import Module
2+
from beacon8.init import zero, xavier
3+
from beacon8.utils import create_param_and_grad
24

35
import theano as _th
46
import numpy as _np
57

68

79
class SpatialConvolution(Module):
8-
def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, with_bias=True, border_mode='valid', imshape=None):
10+
def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, with_bias=True, border_mode='valid', imshape=None, init=xavier, init_b=zero):
911
Module.__init__(self)
1012
self.n_input_plane = n_input_plane
1113
self.n_output_plane = n_output_plane
@@ -17,19 +19,17 @@ def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, with_b
1719
self.border_mode = border_mode
1820
self.imshape = imshape
1921

20-
w_bound = _np.sqrt(4. / ((self.n_input_plane + self.n_output_plane) * self.k_w * self.k_h))
21-
W = _np.random.uniform(low=-w_bound, high=w_bound, size=(n_output_plane, n_input_plane, k_h, k_w))
22-
self.weight = _th.shared(W.astype(dtype=_th.config.floatX))
23-
self.grad_weight = _th.shared((W*0).astype(dtype=_th.config.floatX))
22+
self.w_shape = (n_output_plane, n_input_plane, k_h, k_w)
23+
w_fan = (n_input_plane*k_w*k_h, n_output_plane*k_w*k_h)
2424

25+
self.weight, self.grad_weight = create_param_and_grad(self.w_shape, init, fan=w_fan, name='Wconv_{},{}@{}x{}'.format(n_input_plane, n_output_plane, k_w, k_h))
2526
if self.with_bias:
26-
self.bias = _th.shared(_np.zeros(shape=(n_output_plane, ), dtype=_th.config.floatX))
27-
self.grad_bias = _th.shared(_np.zeros(shape=(n_output_plane, ), dtype=_th.config.floatX))
27+
self.bias, self.grad_bias = create_param_and_grad(n_output_plane, init_b, name='bconv_{}'.format(n_output_plane))
2828

2929
def symb_forward(self, symb_input):
3030
conv_output = _th.tensor.nnet.conv.conv2d(symb_input, self.weight,
3131
image_shape=(None, self.n_input_plane) + (self.imshape or (None, None)),
32-
filter_shape=(self.n_output_plane, self.n_input_plane, self.k_h, self.k_w),
32+
filter_shape=self.w_shape,
3333
border_mode=self.border_mode,
3434
subsample=(self.d_h, self.d_w)
3535
)
@@ -38,4 +38,3 @@ def symb_forward(self, symb_input):
3838
return conv_output + self.bias.dimshuffle('x', 0, 'x', 'x')
3939
else:
4040
return conv_output
41-

beacon8/layers/SpatialConvolutionCUDNN.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1+
from .Module import Module
2+
from beacon8.init import zero, xavier
3+
from beacon8.utils import create_param_and_grad
4+
15
import theano as _th
26
import numpy as _np
37
import theano.sandbox.cuda.dnn as _dnn
48

5-
from .Module import Module
6-
79

810
class SpatialConvolutionCUDNN(Module):
9-
def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=0, pad_h=0, with_bias=True):
11+
def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=0, pad_h=0, with_bias=True, init=xavier, init_b=zero):
1012
Module.__init__(self)
1113
self.n_input_plane = n_input_plane
1214
self.n_output_plane = n_output_plane
@@ -17,15 +19,13 @@ def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=
1719
self.pad_w = pad_w
1820
self.pad_h = pad_h
1921
self.with_bias = with_bias
20-
w_bound = _np.sqrt(4. / ((self.n_input_plane + self.n_output_plane) * self.k_w * self.k_h))
2122

22-
W = _np.random.uniform(low=-w_bound, high=w_bound, size=(n_output_plane, n_input_plane, k_h, k_w))
23-
self.weight = _th.shared(W.astype(dtype=_th.config.floatX))
24-
self.grad_weight = _th.shared((W*0).astype(dtype=_th.config.floatX))
23+
w_shape = (n_output_plane, n_input_plane, k_h, k_w)
24+
w_fan = (n_input_plane*k_w*k_h, n_output_plane*k_w*k_h)
2525

26+
self.weight, self.grad_weight = create_param_and_grad(w_shape, init, fan=w_fan, name='Wconv_{},{}@{}x{}'.format(n_input_plane, n_output_plane, k_w, k_h))
2627
if self.with_bias:
27-
self.bias = _th.shared(_np.zeros(shape=(n_output_plane, ), dtype=_th.config.floatX))
28-
self.grad_bias = _th.shared(_np.zeros(shape=(n_output_plane, ), dtype=_th.config.floatX))
28+
self.bias, self.grad_bias = create_param_and_grad(n_output_plane, init_b, name='bconv_{}'.format(n_output_plane))
2929

3030
def symb_forward(self, symb_input):
3131
conv_output = _dnn.dnn_conv(img=symb_input,

beacon8/utils.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
11
import theano as _th
2+
import numpy as _np
3+
4+
5+
def create_param(shape, init, fan=None, name=None, type=_th.config.floatX):
6+
return _th.shared(init(shape, fan).astype(type), name=name)
7+
8+
9+
def create_param_and_grad(shape, init, fan=None, name=None, type=_th.config.floatX):
10+
val = init(shape, fan).astype(type)
11+
param = _th.shared(val, name=name)
12+
grad_name = 'grad_' + name if name is not None else None
13+
grad_param = _th.shared(_np.zeros_like(val), name=grad_name)
14+
return param, grad_param
15+
216

317
def create_param_state_as(other, initial_value=0):
418
return _th.shared(other.get_value()*0 + initial_value,

0 commit comments

Comments
 (0)