Skip to content

Commit 6e3ef61

Browse files
committed
add new example
1 parent f30eeb4 commit 6e3ef61

File tree

6 files changed

+439
-1
lines changed

6 files changed

+439
-1
lines changed

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,9 @@ Examples
9898

9999
This example shows how to use hyperparameter in your research projects, and make your experiments reproducible.
100100

101-
## experiment tracing for data scientists
101+
## [experiment tracing for data scientists](examples/mnist/README.md)
102+
103+
This example shows experiment management with hyperparameter, and tracing the results with mlflow.tracing.
102104

103105
Todo.
104106

examples/mnist/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Basic MNIST Example
2+
3+
```bash
4+
pip install -r requirements.txt
5+
python main.py
6+
# CUDA_VISIBLE_DEVICES=2 python main.py # to specify GPU id to ex. 2
7+
```

examples/mnist/main.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from __future__ import print_function
2+
import argparse
3+
import torch
4+
import torch.nn as nn
5+
import torch.nn.functional as F
6+
import torch.optim as optim
7+
from torchvision import datasets, transforms
8+
from torch.optim.lr_scheduler import StepLR
9+
10+
11+
class Net(nn.Module):
12+
def __init__(self):
13+
super(Net, self).__init__()
14+
self.conv1 = nn.Conv2d(1, 32, 3, 1)
15+
self.conv2 = nn.Conv2d(32, 64, 3, 1)
16+
self.dropout1 = nn.Dropout(0.25)
17+
self.dropout2 = nn.Dropout(0.5)
18+
self.fc1 = nn.Linear(9216, 128)
19+
self.fc2 = nn.Linear(128, 10)
20+
21+
def forward(self, x):
22+
x = self.conv1(x)
23+
x = F.relu(x)
24+
x = self.conv2(x)
25+
x = F.relu(x)
26+
x = F.max_pool2d(x, 2)
27+
x = self.dropout1(x)
28+
x = torch.flatten(x, 1)
29+
x = self.fc1(x)
30+
x = F.relu(x)
31+
x = self.dropout2(x)
32+
x = self.fc2(x)
33+
output = F.log_softmax(x, dim=1)
34+
return output
35+
36+
37+
def train(args, model, device, train_loader, optimizer, epoch):
38+
model.train()
39+
for batch_idx, (data, target) in enumerate(train_loader):
40+
data, target = data.to(device), target.to(device)
41+
optimizer.zero_grad()
42+
output = model(data)
43+
loss = F.nll_loss(output, target)
44+
loss.backward()
45+
optimizer.step()
46+
if batch_idx % args.log_interval == 0:
47+
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
48+
epoch, batch_idx * len(data), len(train_loader.dataset),
49+
100. * batch_idx / len(train_loader), loss.item()))
50+
if args.dry_run:
51+
break
52+
53+
54+
def test(model, device, test_loader):
55+
model.eval()
56+
test_loss = 0
57+
correct = 0
58+
with torch.no_grad():
59+
for data, target in test_loader:
60+
data, target = data.to(device), target.to(device)
61+
output = model(data)
62+
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
63+
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
64+
correct += pred.eq(target.view_as(pred)).sum().item()
65+
66+
test_loss /= len(test_loader.dataset)
67+
68+
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
69+
test_loss, correct, len(test_loader.dataset),
70+
100. * correct / len(test_loader.dataset)))
71+
72+
73+
def main():
74+
# Training settings
75+
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
76+
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
77+
help='input batch size for training (default: 64)')
78+
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
79+
help='input batch size for testing (default: 1000)')
80+
parser.add_argument('--epochs', type=int, default=14, metavar='N',
81+
help='number of epochs to train (default: 14)')
82+
parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
83+
help='learning rate (default: 1.0)')
84+
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
85+
help='Learning rate step gamma (default: 0.7)')
86+
parser.add_argument('--no-cuda', action='store_true', default=False,
87+
help='disables CUDA training')
88+
parser.add_argument('--dry-run', action='store_true', default=False,
89+
help='quickly check a single pass')
90+
parser.add_argument('--seed', type=int, default=1, metavar='S',
91+
help='random seed (default: 1)')
92+
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
93+
help='how many batches to wait before logging training status')
94+
parser.add_argument('--save-model', action='store_true', default=False,
95+
help='For Saving the current Model')
96+
args = parser.parse_args()
97+
use_cuda = not args.no_cuda and torch.cuda.is_available()
98+
99+
torch.manual_seed(args.seed)
100+
101+
device = torch.device("cuda" if use_cuda else "cpu")
102+
103+
train_kwargs = {'batch_size': args.batch_size}
104+
test_kwargs = {'batch_size': args.test_batch_size}
105+
if use_cuda:
106+
cuda_kwargs = {'num_workers': 1,
107+
'pin_memory': True,
108+
'shuffle': True}
109+
train_kwargs.update(cuda_kwargs)
110+
test_kwargs.update(cuda_kwargs)
111+
112+
transform=transforms.Compose([
113+
transforms.ToTensor(),
114+
transforms.Normalize((0.1307,), (0.3081,))
115+
])
116+
dataset1 = datasets.MNIST('../data', train=True, download=True,
117+
transform=transform)
118+
dataset2 = datasets.MNIST('../data', train=False,
119+
transform=transform)
120+
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
121+
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
122+
123+
model = Net().to(device)
124+
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
125+
126+
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
127+
for epoch in range(1, args.epochs + 1):
128+
train(args, model, device, train_loader, optimizer, epoch)
129+
test(model, device, test_loader)
130+
scheduler.step()
131+
132+
if args.save_model:
133+
torch.save(model.state_dict(), "mnist_cnn.pt")
134+
135+
136+
if __name__ == '__main__':
137+
main()

examples/mnist/main_with_hp.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
from __future__ import print_function
2+
import argparse
3+
import torch
4+
import torch.nn as nn
5+
import torch.nn.functional as F
6+
import torch.optim as optim
7+
from torchvision import datasets, transforms
8+
from torch.optim.lr_scheduler import StepLR
9+
10+
from hyperparameter import param_scope
11+
12+
13+
class Net(nn.Module):
14+
def __init__(self):
15+
with param_scope() as hp:
16+
super(Net, self).__init__()
17+
self.conv1 = nn.Conv2d(1, 32, 3, 1)
18+
self.conv2 = nn.Conv2d(32, 64, 3, 1)
19+
self.dropout1 = nn.Dropout(hp().dropout1(0.25))
20+
self.dropout2 = nn.Dropout(hp().dropout1(0.5))
21+
self.fc1 = nn.Linear(9216, hp().fc1(128))
22+
self.fc2 = nn.Linear(hp().fc1(128), 10)
23+
24+
def forward(self, x):
25+
x = self.conv1(x)
26+
x = F.relu(x)
27+
x = self.conv2(x)
28+
x = F.relu(x)
29+
x = F.max_pool2d(x, 2)
30+
x = self.dropout1(x)
31+
x = torch.flatten(x, 1)
32+
x = self.fc1(x)
33+
x = F.relu(x)
34+
x = self.dropout2(x)
35+
x = self.fc2(x)
36+
output = F.log_softmax(x, dim=1)
37+
return output
38+
39+
40+
def train(args, model, device, train_loader, optimizer, epoch):
41+
model.train()
42+
for batch_idx, (data, target) in enumerate(train_loader):
43+
data, target = data.to(device), target.to(device)
44+
optimizer.zero_grad()
45+
output = model(data)
46+
loss = F.nll_loss(output, target)
47+
loss.backward()
48+
optimizer.step()
49+
if batch_idx % args.log_interval == 0:
50+
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
51+
epoch, batch_idx * len(data), len(train_loader.dataset),
52+
100. * batch_idx / len(train_loader), loss.item()))
53+
if args.dry_run:
54+
break
55+
56+
57+
def test(model, device, test_loader):
58+
model.eval()
59+
test_loss = 0
60+
correct = 0
61+
with torch.no_grad():
62+
for data, target in test_loader:
63+
data, target = data.to(device), target.to(device)
64+
output = model(data)
65+
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
66+
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
67+
correct += pred.eq(target.view_as(pred)).sum().item()
68+
69+
test_loss /= len(test_loader.dataset)
70+
71+
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
72+
test_loss, correct, len(test_loader.dataset),
73+
100. * correct / len(test_loader.dataset)))
74+
75+
76+
def main():
77+
# Training settings
78+
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
79+
parser.add_argument('--batch-size', type=int, default=64, metavar='N',
80+
help='input batch size for training (default: 64)')
81+
parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N',
82+
help='input batch size for testing (default: 1000)')
83+
parser.add_argument('--epochs', type=int, default=14, metavar='N',
84+
help='number of epochs to train (default: 14)')
85+
parser.add_argument('--lr', type=float, default=1.0, metavar='LR',
86+
help='learning rate (default: 1.0)')
87+
parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
88+
help='Learning rate step gamma (default: 0.7)')
89+
parser.add_argument('--no-cuda', action='store_true', default=False,
90+
help='disables CUDA training')
91+
parser.add_argument('--dry-run', action='store_true', default=False,
92+
help='quickly check a single pass')
93+
parser.add_argument('--seed', type=int, default=1, metavar='S',
94+
help='random seed (default: 1)')
95+
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
96+
help='how many batches to wait before logging training status')
97+
parser.add_argument('--save-model', action='store_true', default=False,
98+
help='For Saving the current Model')
99+
parser.add_argument('-D', '--define', nargs='*', default=[])
100+
args = parser.parse_args()
101+
use_cuda = not args.no_cuda and torch.cuda.is_available()
102+
103+
torch.manual_seed(args.seed)
104+
105+
device = torch.device("cuda" if use_cuda else "cpu")
106+
107+
train_kwargs = {'batch_size': args.batch_size}
108+
test_kwargs = {'batch_size': args.test_batch_size}
109+
if use_cuda:
110+
cuda_kwargs = {'num_workers': 1,
111+
'pin_memory': True,
112+
'shuffle': True}
113+
train_kwargs.update(cuda_kwargs)
114+
test_kwargs.update(cuda_kwargs)
115+
116+
transform=transforms.Compose([
117+
transforms.ToTensor(),
118+
transforms.Normalize((0.1307,), (0.3081,))
119+
])
120+
dataset1 = datasets.MNIST('../data', train=True, download=True,
121+
transform=transform)
122+
dataset2 = datasets.MNIST('../data', train=False,
123+
transform=transform)
124+
train_loader = torch.utils.data.DataLoader(dataset1,**train_kwargs)
125+
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
126+
127+
with param_scope(*args.define):
128+
model = Net().to(device)
129+
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)
130+
131+
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
132+
for epoch in range(1, args.epochs + 1):
133+
train(args, model, device, train_loader, optimizer, epoch)
134+
test(model, device, test_loader)
135+
scheduler.step()
136+
137+
if args.save_model:
138+
torch.save(model.state_dict(), "mnist_cnn.pt")
139+
140+
141+
if __name__ == '__main__':
142+
main()

0 commit comments

Comments
 (0)