-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathsequential_mnist_example.py
More file actions
87 lines (68 loc) · 3.76 KB
/
sequential_mnist_example.py
File metadata and controls
87 lines (68 loc) · 3.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import argparse
import torch.nn.functional as F
from torch import optim
from model import LSTMBaseline
from utils import sequential_MNIST, train, test
from hcgd import HCAdam, HCGD
parser = argparse.ArgumentParser(description='Recurrent Unit Baselines')
parser.add_argument('--batch_size', help='batch size of network', type=int, default=64)
parser.add_argument('--epochs', help='number of epochs', type=int, default=20)
parser.add_argument('--hidden_layer_size', help='size of the hidden layer in the LSTM', type=int, default=128)
parser.add_argument('--no-gpu', help='dont use gpu for training', action='store_true')
parser.add_argument('--learning-rate', help='the learning rate', type=float, default=0.01)
parser.add_argument('--inner-learning-rate', help='the inner learning rate for HC methods', type=float, default=0.01)
parser.add_argument('--n-corrections', help='number of iterations in the inner loop for HC methods',
type=int, default=1)
parser.add_argument('--function-correction-lambda',
help='hyperparameter for HC methods, controlling strength of functional regularization',
type=float, default=0.5)
parser.add_argument('--gradient_clipping_value', help='the gradient clipping value', type=int, default=1)
parser.add_argument('--log-interval', type=int, default=0, metavar='N',
help='how many batches to wait before logging training status, if wanting train error')
parser.add_argument('--opt', type=str, default='sgd', help='which optimizer to use',
choices=('hcadam', 'adam', 'sgd', 'hcgd', 'rmsprop'))
args = parser.parse_args()
args.gpu = not args.no_gpu
if __name__ == '__main__':
# how many pixels to read at one time. =1 is the pure sequential MNIST task
NUM_PIXELS = 1
training_data, testing_data = sequential_MNIST(args.batch_size, NUM_PIXELS, gpu=args.gpu)
val_loader, _ = sequential_MNIST(args.batch_size, NUM_PIXELS, gpu=args.gpu)
lr = args.learning_rate
fcl = args.function_correction_lambda
ilr = args.inner_learning_rate
ncorr = args.n_corrections
model = LSTMBaseline(NUM_PIXELS, args.batch_size, hidden_dim=args.hidden_layer_size, num_layers=1)
if args.gpu:
model.cuda()
criterion = F.nll_loss
if args.opt == 'sgd':
optimizer = optim.SGD(model.parameters(), lr=lr,
momentum=0.9, weight_decay=1e-4)
elif args.opt == 'adam':
optimizer = optim.Adam(model.parameters(), lr=lr)
elif args.opt == 'rmsprop':
optimizer = optim.RMSprop(model.parameters(), lr=lr)
elif args.opt == 'sgdw':
optimizer = HCGD(model.parameters(),
lr, momentum=0.9,weight_decay=1e-4, #normal SGD params
fcn_change_limiter=fcl, inner_lr=ilr, n_corrections=ncorr, # HC params
)
elif args.opt == 'hcadam':
optimizer = HCAdam(model.parameters(), lr,
fcn_change_limiter=fcl, inner_lr=ilr, n_corrections=ncorr, # HC params
betas=(0.9, 0.999), eps=1e-08, amsgrad=False, # adam params
weight_decay=0, clip_correction_grad=0, )
test_accuracy = []
train_acc = []
for epoch in range(1, args.epochs):
tr, te = train(model, training_data, val_loader, testing_data, criterion, args, optimizer)
if len(te)>0:
test_error_to_print = te[-1]
else:
test_loss, acc = test(model, testing_data, criterion, args, 10000)
test_error_to_print = acc
print('Epoch: {}, test accuracy {}'.format(epoch, test_error_to_print))
# store all the accuracies in a list. These can be used to create figures.
test_accuracy += te
train_acc += tr