Skip to content

Commit 290a5d7

Browse files
committed
Merge pull request #5 from ikostrikov/master
Added convolutional layers and a lenet example for MNIST.
2 parents 9c228ca + 6d0d6a7 commit 290a5d7

File tree

6 files changed

+107
-6
lines changed

6 files changed

+107
-6
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
import theano as _th
2+
import numpy as _np
3+
import theano.sandbox.cuda.dnn as _dnn
4+
5+
from .Module import Module
6+
7+
8+
class SpatialConvolutionCUDNN(Module):
9+
def __init__(self, n_input_plane, n_output_plane, k_w, k_h, d_w=1, d_h=1, pad_w=0, pad_h=0, with_bias=True):
10+
Module.__init__(self)
11+
self.n_input_plane = n_input_plane
12+
self.n_output_plane = n_output_plane
13+
self.k_w = k_w
14+
self.k_h = k_h
15+
self.d_w = d_w
16+
self.d_h = d_h
17+
self.pad_w = pad_w
18+
self.pad_h = pad_h
19+
self.with_bias = with_bias
20+
w_bound = _np.sqrt(4. / ((self.n_input_plane + self.n_output_plane) * self.k_w * self.k_h))
21+
22+
W = _np.random.uniform(low=-w_bound, high=w_bound, size=(n_output_plane, n_input_plane, k_h, k_w))
23+
self.weight = _th.shared(W.astype(dtype=_th.config.floatX))
24+
self.grad_weight = _th.shared((W*0).astype(dtype=_th.config.floatX))
25+
26+
if self.with_bias:
27+
self.bias = _th.shared(_np.zeros(shape=(n_output_plane, ), dtype=_th.config.floatX))
28+
self.grad_bias = _th.shared(_np.zeros(shape=(n_output_plane, ), dtype=_th.config.floatX))
29+
30+
def symb_forward(self, symb_input):
31+
conv_output = _dnn.dnn_conv(img=symb_input,
32+
kerns=self.weight,
33+
border_mode=(self.pad_h, self.pad_w),
34+
subsample=(self.d_h, self.d_w))
35+
36+
if self.with_bias:
37+
return conv_output + self.bias.dimshuffle('x', 0, 'x', 'x')
38+
else:
39+
return conv_output
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import theano.sandbox.cuda.dnn as _dnn
2+
3+
from .Module import Module
4+
5+
6+
class SpatialMaxPoolingCUDNN(Module):
7+
def __init__(self, k_w, k_h, d_w=None, d_h=None, pad_w=0, pad_h=0):
8+
Module.__init__(self)
9+
self.k_w = k_w
10+
self.k_h = k_h
11+
12+
if d_w is None:
13+
self.d_w = self.k_w
14+
else:
15+
self.d_w = d_w
16+
17+
if d_h is None:
18+
self.d_h = self.k_h
19+
else:
20+
self.d_h = d_h
21+
22+
self.pad_w = pad_w
23+
self.pad_h = pad_h
24+
25+
def symb_forward(self, symb_input):
26+
return _dnn.dnn_pool(
27+
img=symb_input,
28+
ws=(self.k_w, self.k_h),
29+
stride=(self.d_w, self.d_h),
30+
mode='max',
31+
pad=(self.pad_w, self.pad_h)
32+
)

beacon8/layers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,5 @@
88
from .AddConstant import *
99
from .Log import *
1010
from .Reshape import *
11+
from .SpatialConvolutionCUDNN import *
12+
from .SpatialMaxPoolingCUDNN import *

examples/MNIST/model.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,27 @@ def net():
1616
model.add(bb8.SoftMax())
1717
return model
1818

19+
20+
def lenet():
21+
model = bb8.Sequential()
22+
model.add(bb8.Reshape(-1, 1, 28, 28))
23+
model.add(bb8.SpatialConvolutionCUDNN(1, 32, 5, 5, 1, 1, 2, 2, with_bias=False))
24+
model.add(bb8.BatchNormalization(32))
25+
model.add(bb8.ReLU())
26+
model.add(bb8.SpatialMaxPoolingCUDNN(2, 2))
27+
28+
model.add(bb8.SpatialConvolutionCUDNN(32, 64, 5, 5, 1, 1, 2, 2, with_bias=False))
29+
model.add(bb8.BatchNormalization(64))
30+
model.add(bb8.ReLU())
31+
model.add(bb8.SpatialMaxPoolingCUDNN(2, 2))
32+
model.add(bb8.Reshape(-1, 7*7*64))
33+
34+
model.add(bb8.Linear(7*7*64, 100, with_bias=False))
35+
model.add(bb8.BatchNormalization(100))
36+
model.add(bb8.ReLU())
37+
model.add(bb8.Dropout(0.5))
38+
39+
model.add(bb8.Linear(100, 10))
40+
model.add(bb8.SoftMax())
41+
return model
42+

examples/MNIST/run.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ def main(params):
1010
train_set_x, train_set_y = train_set
1111
test_set_x, test_set_y = test_set
1212

13-
model = net()
13+
model = lenet()
1414

1515
criterion = bb8.ClassNLLCriterion()
1616

@@ -19,13 +19,14 @@ def main(params):
1919
for epoch in range(100):
2020
model.training()
2121
train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'])
22+
train(train_set_x, train_set_y, model, optimiser, criterion, epoch, params['batch_size'], 'stat')
2223

2324
model.evaluate()
2425
validate(test_set_x, test_set_y, model, epoch, params['batch_size'])
2526

2627

2728
if __name__ == "__main__":
2829
params = {}
29-
params['lr'] = 0.001
30+
params['lr'] = 0.1
3031
params['batch_size'] = 64
3132
main(params)

examples/MNIST/train.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import theano as _th
44

55

6-
def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size):
6+
def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size, mode=None):
77
progress = make_progressbar('Training', epoch, len(dataset_x))
88
progress.start()
99

@@ -17,9 +17,12 @@ def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size):
1717
mini_batch_input[k] = dataset_x[shuffle[j * batch_size + k]]
1818
mini_batch_targets[k] = dataset_y[shuffle[j * batch_size + k]]
1919

20-
model.zero_grad_parameters()
21-
model.accumulate_gradients(mini_batch_input, mini_batch_targets, criterion)
22-
optimiser.update_parameters(model)
20+
if mode is None:
21+
model.zero_grad_parameters()
22+
model.accumulate_gradients(mini_batch_input, mini_batch_targets, criterion)
23+
optimiser.update_parameters(model)
24+
else:
25+
model.accumulate_statistics(mini_batch_input)
2326

2427
progress.update(j * batch_size)
2528

0 commit comments

Comments
 (0)