Skip to content

Commit 490b1cb

Browse files
committed
Add simple Optimizer example based on MNIST.
1 parent 7281fa8 commit 490b1cb

File tree

5 files changed

+184
-0
lines changed

5 files changed

+184
-0
lines changed

examples/Optimizers/mnist.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import os
2+
import gzip
3+
import pickle
4+
import sys
5+
6+
# Python 2/3 compatibility.
7+
try:
8+
from urllib.request import urlretrieve
9+
except ImportError:
10+
from urllib import urlretrieve
11+
12+
13+
'''Adapted from theano tutorial'''
14+
15+
16+
def load_mnist(data_file = os.path.join(os.path.dirname(__file__), 'mnist.pkl.gz')):
17+
18+
if not os.path.exists(data_file):
19+
origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
20+
print('Downloading data from {}'.format(origin))
21+
urlretrieve(origin, data_file)
22+
23+
print('... loading data')
24+
25+
with gzip.open(data_file, 'rb') as f:
26+
if sys.version_info[0] == 3:
27+
return pickle.load(f, encoding='latin1')
28+
else:
29+
return pickle.load(f)

examples/Optimizers/model.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import DeepFried2 as df
2+
3+
4+
def net():
5+
model = df.Sequential()
6+
model.add(df.Linear(28*28, 100))
7+
model.add(df.ReLU())
8+
9+
model.add(df.Linear(100, 100))
10+
model.add(df.ReLU())
11+
12+
model.add(df.Linear(100, 100))
13+
model.add(df.ReLU())
14+
15+
model.add(df.Linear(100, 10))
16+
model.add(df.SoftMax())
17+
return model
18+
19+
20+
def lenet():
21+
model = df.Sequential()
22+
model.add(df.Reshape(-1, 1, 28, 28))
23+
model.add(df.SpatialConvolutionCUDNN(1, 32, 5, 5, 1, 1, 2, 2, with_bias=False))
24+
model.add(df.BatchNormalization(32))
25+
model.add(df.ReLU())
26+
model.add(df.SpatialMaxPoolingCUDNN(2, 2))
27+
28+
model.add(df.SpatialConvolutionCUDNN(32, 64, 5, 5, 1, 1, 2, 2, with_bias=False))
29+
model.add(df.BatchNormalization(64))
30+
model.add(df.ReLU())
31+
model.add(df.SpatialMaxPoolingCUDNN(2, 2))
32+
model.add(df.Reshape(-1, 7*7*64))
33+
34+
model.add(df.Linear(7*7*64, 100, with_bias=False))
35+
model.add(df.BatchNormalization(100))
36+
model.add(df.ReLU())
37+
model.add(df.Dropout(0.5))
38+
39+
model.add(df.Linear(100, 10))
40+
model.add(df.SoftMax())
41+
return model
42+
43+
44+
def lenet2():
45+
model = df.Sequential()
46+
model.add(df.Reshape(-1, 1, 28, 28))
47+
model.add(df.SpatialConvolution(1, 32, 5, 5, 1, 1, with_bias=False))
48+
model.add(df.BatchNormalization(32))
49+
model.add(df.ReLU())
50+
model.add(df.SpatialMaxPooling(2, 2))
51+
52+
model.add(df.SpatialConvolution(32, 64, 5, 5, 1, 1, with_bias=False))
53+
model.add(df.BatchNormalization(64))
54+
model.add(df.ReLU())
55+
model.add(df.SpatialMaxPooling(2, 2))
56+
model.add(df.Reshape(-1, 4*4*64))
57+
58+
model.add(df.Linear(4*4*64, 100, with_bias=False))
59+
model.add(df.BatchNormalization(100))
60+
model.add(df.ReLU())
61+
model.add(df.Dropout(0.5))
62+
63+
model.add(df.Linear(100, 10))
64+
model.add(df.SoftMax())
65+
return model

examples/Optimizers/run.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import DeepFried2 as df
2+
3+
from examples.utils import make_progressbar
4+
5+
from mnist import load_mnist
6+
from train import train
7+
from test import test
8+
from model import net, lenet2
9+
10+
11+
if __name__ == "__main__":
12+
print("THIS IS JUST AN EXAMPLE.")
13+
print("Please don't take these numbers as a benchmark.")
14+
print("While the optimizer's parameters have been grid-searched,")
15+
print("a fair comparison would run all experiments multiple times AND RUN MORE THAN FIVE EPOCHS.")
16+
17+
batch_size = 64
18+
19+
(Xtrain, ytrain), (Xval, yval), (Xtest, ytest) = load_mnist()
20+
21+
criterion = df.ClassNLLCriterion()
22+
23+
def run(optim):
24+
progress = make_progressbar('Training with ' + str(optim), 5)
25+
progress.start()
26+
27+
model = net()
28+
model.training()
29+
for epoch in range(5):
30+
train(Xtrain, ytrain, model, optim, criterion, batch_size, 'train')
31+
train(Xtrain, ytrain, model, optim, criterion, batch_size, 'stats')
32+
progress.update(epoch+1)
33+
34+
progress.finish()
35+
36+
model.evaluate()
37+
nll, _ = test(Xtrain, ytrain, model, batch_size)
38+
_, nerr = test(Xval, yval, model, batch_size)
39+
40+
print("Trainset NLL: {:.2f}".format(nll))
41+
print("Testset errors: {}".format(nerr))
42+
43+
run(df.SGD(lr=1e-1))
44+
run(df.Momentum(lr=1e-2, momentum=0.95))
45+
run(df.Nesterov(lr=1e-2, momentum=0.90))
46+
run(df.AdaGrad(lr=1e-2, eps=1e-4))
47+
run(df.RMSProp(lr=1e-3, rho=0.90, eps=1e-5))
48+
run(df.AdaDelta(rho=0.99, lr=5e-1, eps=1e-4))

examples/Optimizers/test.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import numpy as np
2+
import theano as th
3+
4+
5+
def test(X, y, model, batch_size):
6+
7+
nll = 0
8+
nerrors = 0
9+
for j in range((len(X) + batch_size - 1) // batch_size):
10+
# Note: numpy correctly handles the size of the last minibatch.
11+
miniX = X[j*batch_size : (j+1)*batch_size].astype(th.config.floatX)
12+
miniy = y[j*batch_size : (j+1)*batch_size]
13+
14+
pred_probas = model.forward(miniX)
15+
preds = np.argmax(pred_probas, axis=1)
16+
17+
nll -= sum(np.log(np.clip(pred_probas[np.arange(len(miniX)), miniy], 1e-15, 1-1e-15)))
18+
nerrors += sum(preds != miniy)
19+
20+
#accuracy = 1 - float(nerrors)/len(X)
21+
return nll, nerrors

examples/Optimizers/train.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import numpy as np
2+
import theano as th
3+
4+
5+
def train(X, y, model, optimiser, criterion, batch_size, mode='train'):
6+
7+
shuffle = np.random.permutation(len(X))
8+
9+
for j in range(len(X) // batch_size):
10+
indices = shuffle[j*batch_size : (j+1)*batch_size]
11+
mini_batch_input = X[indices].astype(th.config.floatX)
12+
mini_batch_targets = y[indices].astype(th.config.floatX)
13+
14+
if mode == 'train':
15+
model.zero_grad_parameters()
16+
model.accumulate_gradients(mini_batch_input, mini_batch_targets, criterion)
17+
optimiser.update_parameters(model)
18+
elif mode == 'stats':
19+
model.accumulate_statistics(mini_batch_input)
20+
else:
21+
assert False, "Mode should be either 'train' or 'stats'"

0 commit comments

Comments
 (0)