Skip to content

Commit 02c8d47

Browse files
committed
Merge pull request #19 from lucasb-eyer/examples-indexing
Make Indexing minibatches simpler thanks to numpy.
2 parents 8ef7b8f + 80d3a5d commit 02c8d47

File tree

4 files changed

+22
-42
lines changed

4 files changed

+22
-42
lines changed

examples/Kaggle-Otto/test.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
import theano as _th
2+
import theano as th
33

44
from kaggle_utils import multiclass_log_loss
55
from examples.utils import make_progressbar
@@ -8,24 +8,17 @@ def validate(dataset_x, dataset_y, model, epoch, batch_size):
88
progress = make_progressbar('Testing epoch #{}'.format(epoch), len(dataset_x))
99
progress.start()
1010

11-
mini_batch_input = np.empty(shape=(batch_size, 93), dtype=_th.config.floatX)
12-
mini_batch_targets = np.empty(shape=(batch_size, ), dtype=_th.config.floatX)
1311
logloss = 0.
14-
1512
for j in range((dataset_x.shape[0] + batch_size - 1) // batch_size):
16-
progress.update(j * batch_size)
17-
for k in range(batch_size):
18-
if j * batch_size + k < dataset_x.shape[0]:
19-
mini_batch_input[k] = dataset_x[j * batch_size + k]
20-
mini_batch_targets[k] = dataset_y[j * batch_size + k]
13+
# Note: numpy correctly handles the size of the last minibatch.
14+
mini_batch_input = dataset_x[j*batch_size : (j+1)*batch_size].astype(th.config.floatX)
15+
mini_batch_targets = dataset_y[j*batch_size : (j+1)*batch_size].astype(th.config.floatX)
2116

2217
mini_batch_prediction = model.forward(mini_batch_input)
2318

24-
if (j + 1) * batch_size > dataset_x.shape[0]:
25-
mini_batch_prediction.resize((dataset_x.shape[0] - j * batch_size, 9))
26-
mini_batch_targets.resize((dataset_x.shape[0] - j * batch_size, ))
27-
2819
logloss += multiclass_log_loss(mini_batch_targets, mini_batch_prediction, normalize=False)
2920

21+
progress.update(j * batch_size + len(mini_batch_input))
22+
3023
progress.finish()
3124
print("Epoch #{}, Logloss: {:.5f}".format(epoch, logloss/dataset_x.shape[0]))

examples/Kaggle-Otto/train.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
import theano as _th
2+
import theano as th
33

44
from examples.utils import make_progressbar
55

@@ -10,13 +10,10 @@ def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size,
1010

1111
shuffle = np.random.permutation(len(dataset_x))
1212

13-
mini_batch_input = np.empty(shape=(batch_size, 93), dtype=_th.config.floatX)
14-
mini_batch_targets = np.empty(shape=(batch_size, ), dtype=_th.config.floatX)
15-
1613
for j in range(dataset_x.shape[0] // batch_size):
17-
for k in range(batch_size):
18-
mini_batch_input[k] = dataset_x[shuffle[j * batch_size + k]]
19-
mini_batch_targets[k] = dataset_y[shuffle[j * batch_size + k]]
14+
indices = shuffle[j*batch_size : (j+1)*batch_size]
15+
mini_batch_input = dataset_x[indices].astype(th.config.floatX)
16+
mini_batch_targets = dataset_y[indices].astype(th.config.floatX)
2017

2118
if mode == 'train':
2219
model.zero_grad_parameters()
@@ -27,6 +24,6 @@ def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size,
2724
else:
2825
assert False, "Mode should be either 'train' or 'stats'"
2926

30-
progress.update((j+1) * batch_size)
27+
progress.update(j*batch_size + len(mini_batch_input))
3128

3229
progress.finish()

examples/MNIST/test.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,24 @@
11
import numpy as np
2-
import theano as _th
2+
import theano as th
33

44
from examples.utils import make_progressbar
55

66
def validate(dataset_x, dataset_y, model, epoch, batch_size):
77
progress = make_progressbar('Testing epoch #{}'.format(epoch), len(dataset_x))
88
progress.start()
99

10-
mini_batch_input = np.empty(shape=(batch_size, 28*28), dtype=_th.config.floatX)
11-
mini_batch_targets = np.empty(shape=(batch_size, ), dtype=_th.config.floatX)
1210
nerrors = 0
13-
1411
for j in range((dataset_x.shape[0] + batch_size - 1) // batch_size):
15-
progress.update(j * batch_size)
16-
for k in range(batch_size):
17-
if j * batch_size + k < dataset_x.shape[0]:
18-
mini_batch_input[k] = dataset_x[j * batch_size + k]
19-
mini_batch_targets[k] = dataset_y[j * batch_size + k]
12+
# Note: numpy correctly handles the size of the last minibatch.
13+
mini_batch_input = dataset_x[j*batch_size : (j+1)*batch_size].astype(th.config.floatX)
14+
mini_batch_targets = dataset_y[j*batch_size : (j+1)*batch_size].astype(th.config.floatX)
2015

2116
mini_batch_prediction = np.argmax(model.forward(mini_batch_input), axis=1)
2217

23-
if (j + 1) * batch_size > dataset_x.shape[0]:
24-
mini_batch_prediction.resize((dataset_x.shape[0] - j * batch_size, ))
25-
mini_batch_targets.resize((dataset_x.shape[0] - j * batch_size, ))
26-
2718
nerrors += sum(mini_batch_targets != mini_batch_prediction)
2819

20+
progress.update(j * batch_size)
21+
2922
progress.finish()
3023
accuracy = 1 - float(nerrors)/dataset_x.shape[0]
3124
print("Epoch #{}, Classification accuracy: {:.2%} ({} errors)".format(epoch, accuracy, nerrors))

examples/MNIST/train.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
import theano as _th
2+
import theano as th
33

44
from examples.utils import make_progressbar
55

@@ -10,13 +10,10 @@ def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size,
1010

1111
shuffle = np.random.permutation(len(dataset_x))
1212

13-
mini_batch_input = np.empty(shape=(batch_size, 28*28), dtype=_th.config.floatX)
14-
mini_batch_targets = np.empty(shape=(batch_size, ), dtype=_th.config.floatX)
15-
1613
for j in range(dataset_x.shape[0] // batch_size):
17-
for k in range(batch_size):
18-
mini_batch_input[k] = dataset_x[shuffle[j * batch_size + k]]
19-
mini_batch_targets[k] = dataset_y[shuffle[j * batch_size + k]]
14+
indices = shuffle[j*batch_size : (j+1)*batch_size]
15+
mini_batch_input = dataset_x[indices].astype(th.config.floatX)
16+
mini_batch_targets = dataset_y[indices].astype(th.config.floatX)
2017

2118
if mode == 'train':
2219
model.zero_grad_parameters()
@@ -27,6 +24,6 @@ def train(dataset_x, dataset_y, model, optimiser, criterion, epoch, batch_size,
2724
else:
2825
assert False, "Mode should be either 'train' or 'stats'"
2926

30-
progress.update((j+1) * batch_size)
27+
progress.update(j*batch_size + len(mini_batch_input))
3128

3229
progress.finish()

0 commit comments

Comments
 (0)