Skip to content

Commit bfeba2c

Browse files
authored
Add files via upload
1 parent 3c250e2 commit bfeba2c

File tree

8 files changed

+4276
-0
lines changed

8 files changed

+4276
-0
lines changed

LeNet5/Compare_all_methods.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import matplotlib.pyplot as plt
2+
3+
for _ in range(10):
4+
exec(open('LeNet5_momentum.py').read())
5+
exec(open('LeNet5_adam.py').read())
6+
exec(open('LeNet5_kfac.py').read())
7+
exec(open('LeNet5_fisher_kron.py').read())
8+
exec(open('LeNet5_newton_kron.py').read())
9+
plt.subplot(2,1,1)
10+
plt.legend(['Momentum', 'Adam', 'KFAC', 'Fisher type preconditioner', 'Newton type preconditioner'], loc='best')
11+
12+
plt.show()

LeNet5/LeNet5_adam.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import matplotlib.pyplot as plt
2+
import torch
3+
from torch.autograd import grad
4+
import torch.nn.functional as F
5+
from torchvision import datasets, transforms
6+
7+
train_loader = torch.utils.data.DataLoader(
8+
datasets.MNIST('../data', train=True, download=True,
9+
transform=transforms.Compose([
10+
transforms.ToTensor()])),
11+
batch_size=64, shuffle=True)
12+
test_loader = torch.utils.data.DataLoader(
13+
datasets.MNIST('../data', train=False, transform=transforms.Compose([
14+
transforms.ToTensor()])),
15+
batch_size=1000, shuffle=True)
16+
17+
"""input image size for the original LeNet5 is 32x32, here is 28x28"""
18+
W1 = torch.tensor(0.1*torch.randn(1*5*5+1, 6), requires_grad=True)
19+
W2 = torch.tensor(0.1*torch.randn(6*5*5+1, 16), requires_grad=True)
20+
W3 = torch.tensor(0.1*torch.randn(16*4*4+1, 120), requires_grad=True)#here is 4x4, not 5x5
21+
W4 = torch.tensor(0.1*torch.randn(120+1, 84), requires_grad=True)
22+
W5 = torch.tensor(0.1*torch.randn(84+1, 10), requires_grad=True)
23+
Ws = [W1, W2, W3, W4, W5]
24+
25+
def LeNet5(x):
26+
x = F.conv2d(x, W1[:-1].view(6,1,5,5), bias=W1[-1])
27+
x = F.relu(F.max_pool2d(x, 2))
28+
x = F.conv2d(x, W2[:-1].view(16,6,5,5), bias=W2[-1])
29+
x = F.relu(F.max_pool2d(x, 2))
30+
x = F.relu(x.view(-1, 16*4*4).mm(W3[:-1]) + W3[-1])
31+
x = F.relu(x.mm(W4[:-1]) + W4[-1])
32+
y = x.mm(W5[:-1]) + W5[-1]
33+
return y
34+
35+
def train_loss(data, target):
36+
y = LeNet5(data)
37+
y = F.log_softmax(y, dim=1)
38+
loss = F.nll_loss(y, target)
39+
return loss
40+
41+
def test_loss( ):
42+
num_errs = 0
43+
with torch.no_grad():
44+
for data, target in test_loader:
45+
y = LeNet5(data)
46+
_, pred = torch.max(y, dim=1)
47+
num_errs += torch.sum(pred!=target)
48+
return num_errs.item()/len(test_loader.dataset)
49+
50+
m0 = [torch.zeros(W.shape) for W in Ws]
51+
v0 = [torch.zeros(W.shape) for W in Ws]
52+
step_size = 0.01
53+
cnt = 0
54+
TrainLoss, TestLoss = [], []
55+
for epoch in range(10):
56+
trainloss = 0.0
57+
for batch_idx, (data, target) in enumerate(train_loader):
58+
loss = train_loss(data, target)
59+
60+
grads = grad(loss, Ws)#, create_graph=True)
61+
trainloss += loss.item()
62+
63+
with torch.no_grad():
64+
lmbd = min(cnt/(cnt+1), 0.9)
65+
m0 = [lmbd*old + (1.0-lmbd)*new for (old, new) in zip(m0, grads)]
66+
lmbd = min(cnt/(cnt+1), 0.999)
67+
v0 = [lmbd*old + (1.0-lmbd)*new*new for (old, new) in zip(v0, grads)]
68+
for i in range(len(Ws)):
69+
Ws[i] -= step_size*m0[i]/torch.sqrt(v0[i] + 1e-8)
70+
71+
cnt = cnt + 1
72+
73+
TrainLoss.append(trainloss/len(train_loader.dataset))
74+
TestLoss.append(test_loss())
75+
step_size = 0.01**(1/9)*step_size
76+
print('Epoch: {}; train loss: {}; best test loss: {}'.format(epoch, TrainLoss[-1], min(TestLoss)))
77+
78+
plt.subplot(2,1,1)
79+
plt.semilogy(range(1,11), TrainLoss, '-c', linewidth=0.2)
80+
plt.xlabel('Epochs')
81+
plt.ylabel('Train cross entropy loss')
82+
plt.subplot(2,1,2)
83+
plt.semilogy(range(1,11), TestLoss, '-c', linewidth=0.2)
84+
plt.xlabel('Epochs')
85+
plt.ylabel('Test classification error rate')

LeNet5/LeNet5_fisher_kron.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import matplotlib.pyplot as plt
2+
import torch
3+
from torch.autograd import grad
4+
import torch.nn.functional as F
5+
from torchvision import datasets, transforms
6+
import preconditioned_stochastic_gradient_descent as psgd#requires PSGD file
7+
8+
train_loader = torch.utils.data.DataLoader(
9+
datasets.MNIST('../data', train=True, download=True,
10+
transform=transforms.Compose([
11+
transforms.ToTensor()])),
12+
batch_size=64, shuffle=True)
13+
test_loader = torch.utils.data.DataLoader(
14+
datasets.MNIST('../data', train=False, transform=transforms.Compose([
15+
transforms.ToTensor()])),
16+
batch_size=1000, shuffle=True)
17+
18+
"""input image size for the original LeNet5 is 32x32, here is 28x28"""
19+
W1 = torch.tensor(0.1*torch.randn(1*5*5+1, 6), requires_grad=True)
20+
W2 = torch.tensor(0.1*torch.randn(6*5*5+1, 16), requires_grad=True)
21+
W3 = torch.tensor(0.1*torch.randn(16*4*4+1, 120), requires_grad=True)#here is 4x4, not 5x5
22+
W4 = torch.tensor(0.1*torch.randn(120+1, 84), requires_grad=True)
23+
W5 = torch.tensor(0.1*torch.randn(84+1, 10), requires_grad=True)
24+
Ws = [W1, W2, W3, W4, W5]
25+
26+
def LeNet5(x):
27+
x = F.conv2d(x, W1[:-1].view(6,1,5,5), bias=W1[-1])
28+
x = F.relu(F.max_pool2d(x, 2))
29+
x = F.conv2d(x, W2[:-1].view(16,6,5,5), bias=W2[-1])
30+
x = F.relu(F.max_pool2d(x, 2))
31+
x = F.relu(x.view(-1, 16*4*4).mm(W3[:-1]) + W3[-1])
32+
x = F.relu(x.mm(W4[:-1]) + W4[-1])
33+
y = x.mm(W5[:-1]) + W5[-1]
34+
return y
35+
36+
def train_loss(data, target):
37+
y = LeNet5(data)
38+
y = F.log_softmax(y, dim=1)
39+
loss = F.nll_loss(y, target)
40+
return loss
41+
42+
def test_loss( ):
43+
num_errs = 0
44+
with torch.no_grad():
45+
for data, target in test_loader:
46+
y = LeNet5(data)
47+
_, pred = torch.max(y, dim=1)
48+
num_errs += torch.sum(pred!=target)
49+
return num_errs.item()/len(test_loader.dataset)
50+
51+
Qs = [[torch.eye(W.shape[0]), torch.eye(W.shape[1])] for W in Ws]
52+
step_size = 0.002
53+
damping = 0.0005
54+
grad_norm_clip_thr = 1e10
55+
TrainLoss, TestLoss = [], []
56+
for epoch in range(10):
57+
trainloss = 0.0
58+
for batch_idx, (data, target) in enumerate(train_loader):
59+
loss = train_loss(data, target)
60+
61+
grads = grad(loss, Ws)#, create_graph=True)
62+
trainloss += loss.item()
63+
64+
v = [torch.randn(W.shape) for W in Ws]
65+
Hv = grads#grad(grads, Ws, v)
66+
with torch.no_grad():
67+
Qs = [psgd.update_precond_kron(q[0], q[1], dw, dg + damping*dw) for (q, dw, dg) in zip(Qs, v, Hv)]
68+
pre_grads = [psgd.precond_grad_kron(q[0], q[1], g) for (q, g) in zip(Qs, grads)]
69+
grad_norm = torch.sqrt(sum([torch.sum(g*g) for g in pre_grads]))
70+
step_adjust = min(grad_norm_clip_thr/(grad_norm + 1.2e-38), 1.0)
71+
for i in range(len(Ws)):
72+
Ws[i] -= step_adjust*step_size*pre_grads[i]
73+
74+
TrainLoss.append(trainloss/len(train_loader.dataset))
75+
TestLoss.append(test_loss())
76+
step_size = 0.01**(1/9)*step_size
77+
print('Epoch: {}; train loss: {}; best test loss: {}'.format(epoch, TrainLoss[-1], min(TestLoss)))
78+
79+
plt.subplot(2,1,1)
80+
plt.semilogy(range(1,11), TrainLoss, '-r', linewidth=0.2)
81+
plt.xlabel('Epochs')
82+
plt.ylabel('Train cross entropy loss')
83+
plt.subplot(2,1,2)
84+
plt.semilogy(range(1,11), TestLoss, '-r', linewidth=0.2)
85+
plt.xlabel('Epochs')
86+
plt.ylabel('Test classification error rate')

LeNet5/LeNet5_kfac.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import matplotlib.pyplot as plt
2+
import torch
3+
import torch.nn as nn
4+
import torch.nn.functional as F
5+
import torch.optim as optim
6+
from torchvision import datasets, transforms
7+
from kfac import KFAC#requires KFAC file
8+
9+
train_loader = torch.utils.data.DataLoader(
10+
datasets.MNIST('../data', train=True, download=True,
11+
transform=transforms.Compose([
12+
transforms.ToTensor()])),
13+
batch_size=64, shuffle=True)
14+
test_loader = torch.utils.data.DataLoader(
15+
datasets.MNIST('../data', train=False, transform=transforms.Compose([
16+
transforms.ToTensor()])),
17+
batch_size=1000, shuffle=True)
18+
19+
class LeNet5(nn.Module):
20+
def __init__(self):
21+
super(LeNet5, self).__init__()
22+
self.conv1 = nn.Conv2d(1, 6, kernel_size=5)
23+
self.conv2 = nn.Conv2d(6, 16, kernel_size=5)
24+
self.fc1 = nn.Linear(256, 120)
25+
self.fc2 = nn.Linear(120, 84)
26+
self.fc3 = nn.Linear(84, 10)
27+
28+
def forward(self, x):
29+
x = F.relu(F.max_pool2d(self.conv1(x), 2))
30+
x = F.relu(F.max_pool2d(self.conv2(x), 2))
31+
x = x.view(-1, 256)
32+
x = F.relu(self.fc1(x))
33+
x = F.relu(self.fc2(x))
34+
x = self.fc3(x)
35+
return F.log_softmax(x, dim=1)
36+
37+
def test_loss(model, test_loader):
38+
model.eval()
39+
num_errs = 0
40+
with torch.no_grad():
41+
for data, target in test_loader:
42+
output = model(data)
43+
_, pred = torch.max(output, dim=1)
44+
num_errs += torch.sum(pred!=target)
45+
return num_errs.item()/len(test_loader.dataset)
46+
47+
model = LeNet5()
48+
preconditioner = KFAC(model, 0.001, alpha=0.05)
49+
lr0 = 0.01
50+
optimizer = optim.SGD(model.parameters(), lr=lr0)
51+
TrainLoss, TestLoss = [], []
52+
for epoch in range(10):
53+
model.train()
54+
trainloss = 0.0
55+
for batch_idx, (data, target) in enumerate(train_loader):
56+
optimizer.zero_grad()
57+
output = model(data)
58+
59+
loss = F.nll_loss(output, target)
60+
61+
trainloss += loss.item()
62+
loss.backward()
63+
preconditioner.step()
64+
optimizer.step()
65+
66+
lr0 = 0.01**(1/9)*lr0
67+
optimizer.param_groups[0]['lr'] = lr0
68+
TrainLoss.append(trainloss/len(train_loader.dataset))
69+
TestLoss.append(test_loss(model, test_loader))
70+
print('Epoch: {}; train loss: {}; best test loss: {}'.format(epoch, TrainLoss[-1], min(TestLoss)))
71+
72+
plt.subplot(2,1,1)
73+
plt.semilogy(range(1,11), TrainLoss, '-b', linewidth=0.2)
74+
plt.xlabel('Epochs')
75+
plt.ylabel('Train cross entropy loss')
76+
plt.subplot(2,1,2)
77+
plt.semilogy(range(1,11), TestLoss, '-b', linewidth=0.2)
78+
plt.xlabel('Epochs')
79+
plt.ylabel('Test classification error rate')

LeNet5/LeNet5_momentum.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import matplotlib.pyplot as plt
2+
import torch
3+
from torch.autograd import grad
4+
import torch.nn.functional as F
5+
from torchvision import datasets, transforms
6+
7+
train_loader = torch.utils.data.DataLoader(
8+
datasets.MNIST('../data', train=True, download=True,
9+
transform=transforms.Compose([
10+
transforms.ToTensor()])),
11+
batch_size=64, shuffle=True)
12+
test_loader = torch.utils.data.DataLoader(
13+
datasets.MNIST('../data', train=False, transform=transforms.Compose([
14+
transforms.ToTensor()])),
15+
batch_size=1000, shuffle=True)
16+
17+
"""input image size for the original LeNet5 is 32x32, here is 28x28"""
18+
W1 = torch.tensor(0.1*torch.randn(1*5*5+1, 6), requires_grad=True)
19+
W2 = torch.tensor(0.1*torch.randn(6*5*5+1, 16), requires_grad=True)
20+
W3 = torch.tensor(0.1*torch.randn(16*4*4+1, 120), requires_grad=True)#here is 4x4, not 5x5
21+
W4 = torch.tensor(0.1*torch.randn(120+1, 84), requires_grad=True)
22+
W5 = torch.tensor(0.1*torch.randn(84+1, 10), requires_grad=True)
23+
Ws = [W1, W2, W3, W4, W5]
24+
25+
def LeNet5(x):
26+
x = F.conv2d(x, W1[:-1].view(6,1,5,5), bias=W1[-1])
27+
x = F.relu(F.max_pool2d(x, 2))
28+
x = F.conv2d(x, W2[:-1].view(16,6,5,5), bias=W2[-1])
29+
x = F.relu(F.max_pool2d(x, 2))
30+
x = F.relu(x.view(-1, 16*4*4).mm(W3[:-1]) + W3[-1])
31+
x = F.relu(x.mm(W4[:-1]) + W4[-1])
32+
y = x.mm(W5[:-1]) + W5[-1]
33+
return y
34+
35+
def train_loss(data, target):
36+
y = LeNet5(data)
37+
y = F.log_softmax(y, dim=1)
38+
loss = F.nll_loss(y, target)
39+
return loss
40+
41+
def test_loss( ):
42+
num_errs = 0
43+
with torch.no_grad():
44+
for data, target in test_loader:
45+
y = LeNet5(data)
46+
_, pred = torch.max(y, dim=1)
47+
num_errs += torch.sum(pred!=target)
48+
return num_errs.item()/len(test_loader.dataset)
49+
50+
m0 = [torch.zeros(W.shape) for W in Ws]
51+
step_size = 0.5
52+
cnt = 0
53+
TrainLoss, TestLoss = [], []
54+
for epoch in range(10):
55+
trainloss = 0.0
56+
for batch_idx, (data, target) in enumerate(train_loader):
57+
loss = train_loss(data, target)
58+
59+
grads = grad(loss, Ws)
60+
trainloss += loss.item()
61+
62+
with torch.no_grad():
63+
lmbd = min(cnt/(cnt+1), 0.9)
64+
m0 = [lmbd*old + (1.0-lmbd)*new for (old, new) in zip(m0, grads)]
65+
for i in range(len(Ws)):
66+
Ws[i] -= step_size*m0[i]
67+
68+
cnt = cnt + 1
69+
70+
TrainLoss.append(trainloss/len(train_loader.dataset))
71+
TestLoss.append(test_loss())
72+
step_size = 0.01**(1/9)*step_size
73+
print('Epoch: {}; train loss: {}; best test loss: {}'.format(epoch, TrainLoss[-1], min(TestLoss)))
74+
75+
plt.subplot(2,1,1)
76+
plt.semilogy(range(1,11), TrainLoss, '-m', linewidth=0.2)
77+
plt.xlabel('Epochs')
78+
plt.ylabel('Train cross entropy loss')
79+
plt.subplot(2,1,2)
80+
plt.semilogy(range(1,11), TestLoss, '-m', linewidth=0.2)
81+
plt.xlabel('Epochs')
82+
plt.ylabel('Test classification error rate')

0 commit comments

Comments
 (0)