Skip to content

Commit 6149d78

Browse files
committed
Add prototype p1b1 pytorch benchmark
1 parent ba92047 commit 6149d78

File tree

1 file changed

+267
-0
lines changed

1 file changed

+267
-0
lines changed
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
from __future__ import print_function
2+
3+
import numpy as np
4+
5+
import h5py
6+
import torch
7+
import torch.nn as nn
8+
# import torch.nn.functional as F
9+
10+
import torch.utils.data as data
11+
from torch.autograd import Variable
12+
13+
14+
from scipy.stats.stats import pearsonr
15+
from sklearn.manifold import TSNE
16+
17+
import warnings
18+
with warnings.catch_warnings():
19+
warnings.filterwarnings("ignore", category=DeprecationWarning)
20+
from sklearn.metrics import r2_score
21+
from sklearn.metrics import accuracy_score
22+
23+
import matplotlib as mpl
24+
mpl.use('Agg')
25+
import matplotlib.pyplot as plt
26+
27+
import p1b1
28+
# import candle_pytorch as candle
29+
import candle
30+
31+
np.set_printoptions(precision=4)
32+
33+
34+
def initialize_parameters():
35+
36+
# Build benchmark object
37+
p1b1Bmk = p1b1.BenchmarkP1B1(p1b1.file_path, 'p1b1_default_model.txt', 'pytorch',
38+
prog='p1b1_baseline', desc='Train Autoencoder - Pilot 1 Benchmark 1')
39+
40+
# Initialize parameters
41+
gParameters = candle.finalize_parameters(p1b1Bmk)
42+
# p1b1.logger.info('Params: {}'.format(gParameters))
43+
44+
return gParameters
45+
46+
47+
def save_cache(cache_file, x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels):
48+
with h5py.File(cache_file, 'w') as hf:
49+
hf.create_dataset("x_train", data=x_train)
50+
hf.create_dataset("y_train", data=y_train)
51+
hf.create_dataset("x_val", data=x_val)
52+
hf.create_dataset("y_val", data=y_val)
53+
hf.create_dataset("x_test", data=x_test)
54+
hf.create_dataset("y_test", data=y_test)
55+
hf.create_dataset("x_labels", (len(x_labels), 1), 'S100', data=[x.encode("ascii", "ignore") for x in x_labels])
56+
hf.create_dataset("y_labels", (len(y_labels), 1), 'S100', data=[x.encode("ascii", "ignore") for x in y_labels])
57+
58+
59+
def load_cache(cache_file):
60+
with h5py.File(cache_file, 'r') as hf:
61+
x_train = hf['x_train'][:]
62+
y_train = hf['y_train'][:]
63+
x_val = hf['x_val'][:]
64+
y_val = hf['y_val'][:]
65+
x_test = hf['x_test'][:]
66+
y_test = hf['y_test'][:]
67+
x_labels = [x[0].decode('unicode_escape') for x in hf['x_labels'][:]]
68+
y_labels = [x[0].decode('unicode_escape') for x in hf['y_labels'][:]]
69+
return x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels
70+
71+
72+
class p1b1Model(nn.Module):
73+
def __init__(self, params, input_dim, cond_dim, seed):
74+
super(p1b1Model, self).__init__()
75+
76+
self.keras_defaults = candle.keras_default_config()
77+
self.seed = seed
78+
self.winit_func = params['initialization']
79+
80+
activation = candle.build_pytorch_activation(params['activation'])
81+
dropout = params['dropout']
82+
dense_layers = params['dense']
83+
# dropout_layer = keras.layers.noise.AlphaDropout if params['alpha_dropout'] else Dropout
84+
latent_dim = params['latent_dim']
85+
86+
if dense_layers is not None:
87+
if type(dense_layers) != list:
88+
dense_layers = list(dense_layers)
89+
90+
# Define model
91+
# Add layers
92+
self.ly = nn.Sequential()
93+
# Encoder Part
94+
lprev = input_dim
95+
for i, l in enumerate(dense_layers):
96+
self.ly.add_module('en_dense%d' % i, nn.Linear(lprev, l))
97+
self.ly.add_module('en_act%d' % i, activation)
98+
if params['batch_normalization']:
99+
self.ly.add_module('en_bn%d' % i, nn.BatchNorm1d(l))
100+
if dropout > 0:
101+
self.ly.add_module('en_dropout%d', nn.Dropout(dropout))
102+
lprev = l
103+
104+
if params['model'] == 'ae':
105+
self.ly.add_module('en_dense_latent', nn.Linear(lprev, latent_dim))
106+
self.ly.add_module('en_act_latent', activation)
107+
lprev = latent_dim
108+
109+
# Decoder Part
110+
output_dim = input_dim
111+
for i, l in reversed(list(enumerate(dense_layers))):
112+
self.ly.add_module('de_dense%d' % i, nn.Linear(lprev, l))
113+
self.ly.add_module('de_act%d' % i, activation)
114+
if params['batch_normalization']:
115+
self.ly.add_module('de_bn%d' % i, nn.BatchNorm1d(l))
116+
if dropout > 0:
117+
self.ly.add_module('de_dropout_%d' % i, nn.Dropout(dropout))
118+
lprev = l
119+
120+
self.ly.add_module('out_dense', nn.Linear(lprev, output_dim))
121+
self.ly.add_module('out_act', activation)
122+
self.reset_parameters()
123+
124+
125+
def reset_parameters(self):
126+
""" Resets parameters of all the layers. """
127+
for ly in self.ly:
128+
if isinstance(ly, nn.Linear):
129+
candle.pytorch_initialize(ly.weight, self.winit_func, self.keras_defaults, self.seed)
130+
candle.pytorch_initialize(ly.bias, 'constant', self.keras_defaults, 0.0)
131+
132+
133+
def forward(self, x):
134+
return self.ly(x)
135+
136+
137+
def fit(model, X_train, X_val, params):
138+
# Training set
139+
train_data = torch.from_numpy(X_train)
140+
train_tensor = data.TensorDataset(train_data, train_data)
141+
train_iter = data.DataLoader(train_tensor, batch_size=params['batch_size'], shuffle=params['shuffle'])
142+
143+
# Validation set
144+
val_data = torch.from_numpy(X_val)
145+
val_tensor = torch.utils.data.TensorDataset(val_data, val_data)
146+
val_iter = torch.utils.data.DataLoader(val_tensor, batch_size=params['batch_size'], shuffle=params['shuffle'])
147+
148+
# Configure GPUs
149+
# use_gpu = torch.cuda.is_available()
150+
device_ids = []
151+
ndevices = torch.cuda.device_count()
152+
if ndevices > 1:
153+
for i in range(ndevices):
154+
device_i = torch.device('cuda:'+str(i))
155+
device_ids.append(device_i)
156+
device = device_ids[0]
157+
elif ndevices == 1:
158+
device = torch.device('cuda:0')
159+
else:
160+
device = torch.device('cpu')
161+
162+
# Instantiate with parallel processing
163+
if ndevices > 1:
164+
model = nn.DataParallel(model, device_ids, device)
165+
model.to(device)
166+
167+
if params['learning_rate'] is None:
168+
learning_rate = 1e-2
169+
optimizer = candle.build_pytorch_optimizer(model, params['optimizer'], learning_rate, model.keras_defaults)
170+
171+
loss_fn = candle.get_pytorch_function(params['loss'])
172+
173+
# Train the model
174+
freq_log = 1
175+
176+
total_step = len(train_iter)
177+
loss_list = []
178+
acc_list = []
179+
for epoch in range(params['epochs']):
180+
train_loss = 0
181+
for batch, (in_train, _) in enumerate(train_iter):
182+
#in_train = Variable(in_train)
183+
#if use_gpu:
184+
# in_train = in_train.cuda()
185+
if ndevices > 0:
186+
in_train = in_train.to(device)
187+
188+
# Run the forward pass
189+
output = model(in_train)
190+
loss = loss_fn(output, in_train)
191+
loss_list.append(loss.item())
192+
193+
# Backprop and perform optimisation
194+
optimizer.zero_grad()
195+
loss.backward()
196+
optimizer.step()
197+
train_loss += loss.item() # loss.data[0]
198+
199+
# Logging
200+
if batch % freq_log == 0:
201+
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch * len(in_train), len(train_iter.dataset), 100. * batch / len(train_iter), loss.item()))
202+
# loss.data[0]))# / len(in_train)))
203+
204+
print('====> Epoch: {} Average loss: {:.4f}'.format(
205+
epoch, train_loss / len(train_iter.dataset)))
206+
207+
208+
def run(params):
209+
210+
args = candle.ArgumentStruct(**params)
211+
seed = args.rng_seed
212+
candle.set_seed(seed)
213+
214+
# Construct extension to save model
215+
ext = p1b1.extension_from_parameters(params, '.pytorch')
216+
candle.verify_path(params['save_path'])
217+
prefix = '{}{}'.format(params['save_path'], ext)
218+
logfile = params['logfile'] if params['logfile'] else prefix + '.log'
219+
candle.set_up_logger(logfile, p1b1.logger, params['verbose'])
220+
p1b1.logger.info('Params: {}'.format(params))
221+
222+
# Get default parameters for initialization and optimizer functions
223+
keras_defaults = candle.keras_default_config()
224+
225+
# Load dataset
226+
x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = p1b1.load_data(params, seed)
227+
228+
# cache_file = 'data_l1000_cache.h5'
229+
# save_cache(cache_file, x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels)
230+
# x_train, y_train, x_val, y_val, x_test, y_test, x_labels, y_labels = load_cache(cache_file)
231+
232+
p1b1.logger.info("Shape x_train: {}".format(x_train.shape))
233+
p1b1.logger.info("Shape x_val: {}".format(x_val.shape))
234+
p1b1.logger.info("Shape x_test: {}".format(x_test.shape))
235+
236+
p1b1.logger.info("Range x_train: [{:.3g}, {:.3g}]".format(np.min(x_train), np.max(x_train)))
237+
p1b1.logger.info("Range x_val: [{:.3g}, {:.3g}]".format(np.min(x_val), np.max(x_val)))
238+
p1b1.logger.info("Range x_test: [{:.3g}, {:.3g}]".format(np.min(x_test), np.max(x_test)))
239+
240+
p1b1.logger.debug('Class labels')
241+
for i, label in enumerate(y_labels):
242+
p1b1.logger.debug(' {}: {}'.format(i, label))
243+
244+
# clf = build_type_classifier(x_train, y_train, x_val, y_val)
245+
246+
n_classes = len(y_labels)
247+
cond_train = y_train
248+
cond_val = y_val
249+
cond_test = y_test
250+
251+
input_dim = x_train.shape[1]
252+
cond_dim = cond_train.shape[1]
253+
254+
net = p1b1Model(params, input_dim, cond_dim, seed)
255+
# Display model
256+
print(net)
257+
# Train model
258+
fit(net, x_train, x_val, params)
259+
260+
261+
def main():
262+
params = initialize_parameters()
263+
run(params)
264+
265+
266+
if __name__ == '__main__':
267+
main()

0 commit comments

Comments
 (0)