Skip to content

Commit f8a964c

Browse files
committed
Added initial version of the Pilot 2 Benchmark 2 keras code.
1 parent 1a29925 commit f8a964c

File tree

3 files changed

+388
-0
lines changed

3 files changed

+388
-0
lines changed

P2B2/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
print 'import candle_rnn'
2+
import os,sys
3+
HOME=os.environ['HOME']
4+
sys.path.append('%s/Work/Python/Git_Folder/caffe-tools/Newkeras/keras'%HOME)

P2B2/__main__.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
import theano
2+
import numpy as np
3+
import scipy as sp
4+
import pickle
5+
import sys,os
6+
import glob
7+
import optparse
8+
import matplotlib
9+
matplotlib.use('TKAgg')
10+
import pylab as py
11+
py.ion()
12+
HOME=os.environ['HOME']
13+
def parse_list(option, opt, value, parser):
14+
setattr(parser.values, option.dest, value.split(','))
15+
16+
if __name__=="__main__":
17+
### Hyperparameters and model save path
18+
parser=optparse.OptionParser()
19+
parser.add_option("--train", action="store_true",dest="train_bool",default=False,help="Invoke training")
20+
parser.add_option("--learning-rate",help="learning rate",dest="learning_rate",type=float,default=0.1)
21+
parser.add_option("--noise-factor",help="noise",dest="noise_factor",type=float,default=0.0)
22+
parser.add_option("--cool",action="store_true",dest="cool",default=False,help="Cool Learning Rate")
23+
parser.add_option("--epochs",help="epochs",dest="epochs",type=int,default=1)
24+
parser.add_option("--batch-size",help="batch size",dest="batch_size",type=int,default=1)
25+
parser.add_option("--look-back",help="look back time window",dest="look_back",type=int,default=1)
26+
parser.add_option("--home-dir",help="Home Directory",dest="home_dir",type=str,default='/Users/talathi1/Work/Python/Git_Folder/caffe-tools/keras')
27+
parser.add_option("--save-dir",help="Save Directory",dest="save_path",type=str,default=None)
28+
parser.add_option("--model-file",help="Trained Model Pickle File",dest="weight_path",type=str,default=None)
29+
parser.add_option("--memo",help="Memo",dest="base_memo",type=str,default=None)
30+
parser.add_option("--seed", action="store_true",dest="seed",default=False,help="Random Seed")
31+
(opts,args)=parser.parse_args()
32+
33+
## set the seed
34+
if opts.seed:
35+
np.random.seed(7)
36+
else:
37+
np.random.seed(np.random.randint(10000))
38+
39+
## Set paths
40+
if not os.path.isdir(opts.home_dir):
41+
print ('Keras home directory not set')
42+
sys.exit(0)
43+
sys.path.append('home_dir')
44+
45+
import candle_helper_functions as hf
46+
reload(hf)
47+
maps=hf.autoencoder_preprocess()
48+
49+
## Import keras modules
50+
from keras.optimizers import SGD,RMSprop,Adam
51+
from keras.datasets import mnist
52+
from keras.callbacks import LearningRateScheduler,ModelCheckpoint
53+
from keras.regularizers import l2,WeightRegularizer
54+
from keras import callbacks
55+
from keras.layers.advanced_activations import ELU
56+
from keras.preprocessing.image import ImageDataGenerator
57+
58+
batch_size = opts.batch_size
59+
##### Read Data ########
60+
print ('Reading Data...')
61+
data_file='%s/Research/DeepLearning/ECP CANDLE/Benchmarks/Benchmarks.git/P2B2/sim-numpy.npy'%HOME ### can code to read at the terminal
62+
print 'Data File: %s' %data_file
63+
print 'Data Format: [Num Samples, Num Molecules, Num Atoms, Position]'
64+
65+
X=np.load(data_file) ### Data is: Samples, Molecules, Atoms, x-pos,y-pos,z-pos
66+
## Take center of mass for atoms:
67+
X_A=X.mean(axis=2) ## Data is: Samples, Molecules, x-pos,y-pos,z-pos
68+
data=X_A[0:-1,:,2] ## only consider z-dimension
69+
X_train,y_train=hf.create_dataset(data,opts.look_back,look_forward=1) ## convert data to a sequence
70+
temporal_dim=X_train.shape[1]
71+
input_dim=X_train.shape[2]
72+
subset_sample_weight=np.ones((X_train.shape[0],1))
73+
sample_weight=np.zeros((X_train.shape[0],opts.look_back))
74+
sample_weight[:,0:1]=subset_sample_weight
75+
76+
print('X_train type and shape:', X_train.dtype, X_train.shape)
77+
print('X_train.min():', X_train.min())
78+
print('X_train.max():', X_train.max())
79+
80+
### Define Model, Solver and Compile ##########
81+
print ('Define the model and compile')
82+
opt=Adam(lr=opts.learning_rate)
83+
84+
print ('using mlp network')
85+
model_type='mlp'
86+
hidden_layers=[512,256,128,64,32,16]
87+
recurrent_layers=[16,16,16]
88+
## Model is a Autoencoder-RNN network
89+
model=hf.rnn_dense_auto(weights_path=None,T=temporal_dim,D=input_dim,nonlinearity='relu',hidden_layers=hidden_layers,recurrent_layers=recurrent_layers)
90+
91+
memo='%s_%s_%0.5f'%(opts.base_memo,model_type,opts.learning_rate)
92+
93+
print 'Autoencoder Regression problem'
94+
model.compile(optimizer=opt, loss='mean_squared_error',sample_weight_mode="temporal")
95+
model.summary() ## print model summary in details
96+
#sys.exit(0)
97+
98+
#### Print Compact Model Configuration ###########
99+
# num_layers=len(model.layers)
100+
# print '*'*10,'Model Configuration','*'*10
101+
# for i in range(len(model.layers)):
102+
# print i,': ',model.layers[i].name, ':', model.layers[i].output_shape[:]
103+
104+
### Set up for Training and Validation
105+
total_epochs = opts.epochs
106+
initial_lrate=opts.learning_rate
107+
if opts.cool:
108+
drop=0.5
109+
else:
110+
drop=1.0
111+
112+
epochs_drop=1+int(np.floor(total_epochs/3))
113+
114+
def step_decay(epoch):
115+
global initial_lrate,epochs_drop,drop
116+
lrate = initial_lrate * np.power(drop, np.floor((1+epoch)/epochs_drop))
117+
return lrate
118+
lr_scheduler = LearningRateScheduler(step_decay)
119+
120+
#### Train the Model
121+
if opts.train_bool:
122+
history = callbacks.History()
123+
if opts.save_path !=None:
124+
model_file='%s/%s.hdf5'%(opts.save_path,memo)
125+
checkpointer=ModelCheckpoint(filepath=model_file, verbose=1)
126+
callbacks=[history,lr_scheduler,checkpointer]
127+
else:
128+
callbacks=[history,lr_scheduler]
129+
model.fit(X_train, y_train, batch_size=batch_size,shuffle=False,nb_epoch=total_epochs,callbacks=callbacks,verbose=1,sample_weight=sample_weight)
130+
131+
loss_data={'train': history.history['loss']}
132+
if opts.save_path!=None:
133+
loss_file='%s/%s.pkl'%(opts.save_path,memo)
134+
o=open(loss_file,'wb')
135+
pickle.dump(loss_data,o)
136+
o.close()
137+
138+
## Generate model forecast figure
139+
x=X_train[0:1]
140+
xmod=x.reshape(x.shape[1],x.shape[2])
141+
yf=hf.generate_timedistributed_forecast(model,x,X_train.shape[0]+opts.look_back)
142+
yt=yt=y_train[:,0,:]
143+
ytn=np.vstack([xmod,yt])
144+
py.figure();py.plot(ytn.mean(axis=1))
145+
py.hold('on');py.plot(yf.mean(axis=1))
146+
147+
148+
149+
150+

P2B2/candle_helper_functions.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
from __future__ import absolute_import
2+
import theano
3+
import matplotlib
4+
if 'MACOSX' in matplotlib.get_backend().upper():
5+
matplotlib.use('TKAgg')
6+
import pylab as py
7+
py.ion() ## Turn on plot visualization
8+
9+
import gzip,pickle
10+
import numpy as np
11+
from PIL import Image
12+
import cv2
13+
import keras.backend as K
14+
K.set_image_dim_ordering('th')
15+
from keras.layers import Input, merge, TimeDistributed,LSTM,GRU,RepeatVector
16+
from keras.models import Sequential,Model
17+
from keras.layers.core import Flatten, Dense, Dropout, Activation, Reshape
18+
from keras.initializations import normal, identity, he_normal,glorot_normal,glorot_uniform,he_uniform
19+
from keras.layers.normalization import BatchNormalization
20+
import threading
21+
22+
23+
############# Define Data Generators ################
24+
class ImageNoiseDataGenerator(object):
25+
'''Generate minibatches with
26+
realtime data augmentation.
27+
'''
28+
def __init__(self,corruption_level=0.5):
29+
30+
self.__dict__.update(locals())
31+
self.p=corruption_level
32+
self.lock = threading.Lock()
33+
34+
def _flow_index(self, N, batch_size=32, shuffle=False, seed=None):
35+
b = 0
36+
total_b = 0
37+
while 1:
38+
if b == 0:
39+
if seed is not None:
40+
np.random.seed(seed + total_b)
41+
42+
if shuffle:
43+
index_array = np.random.permutation(N)
44+
else:
45+
index_array = np.arange(N)
46+
47+
current_index = (b * batch_size) % N
48+
if N >= current_index + batch_size:
49+
current_batch_size = batch_size
50+
else:
51+
current_batch_size = N - current_index
52+
53+
if current_batch_size == batch_size:
54+
b += 1
55+
else:
56+
b = 0
57+
total_b += 1
58+
yield index_array[current_index: current_index + current_batch_size], current_index, current_batch_size
59+
60+
def flow(self, X, y, batch_size=32, shuffle=False, seed=None):
61+
assert len(X) == len(y)
62+
self.X = X
63+
self.y = y
64+
self.flow_generator = self._flow_index(X.shape[0], batch_size, shuffle, seed)
65+
return self
66+
67+
def __iter__(self):
68+
# needed if we want to do something like for x,y in data_gen.flow(...):
69+
return self
70+
71+
def next(self):
72+
# for python 2.x
73+
# Keep under lock only the mechainsem which advance the indexing of each batch
74+
# see # http://anandology.com/blog/using-iterators-and-generators/
75+
with self.lock:
76+
index_array, current_index, current_batch_size = next(self.flow_generator)
77+
# The transformation of images is not under thread lock so it can be done in parallel
78+
bX = np.zeros(tuple([current_batch_size] + list(self.X.shape)[1:]))
79+
for i, j in enumerate(index_array):
80+
x = self.X[j]
81+
x = self.insertnoise(x,corruption_level=self.p)
82+
bX[i] = x
83+
bY = self.y[index_array]
84+
return bX, bY
85+
86+
def __next__(self):
87+
# for python 3.x
88+
return self.next()
89+
90+
def insertnoise(self,x,corruption_level=0.5):
91+
return np.random.binomial(1,1-corruption_level,x.shape)*x
92+
93+
94+
### Generate RNN compatible dataset
95+
def create_dataset(dataset, look_back=1,look_forward=1):
96+
## input is np.array of dim T,D
97+
#output is np.array X: N,look_back,D and Y: N,D
98+
# where N=T-look_back-1
99+
assert(look_back>=look_forward)
100+
dataX, dataY = [], []
101+
for i in range(len(dataset)-look_back-(look_forward-1)-1):
102+
a = dataset[i:(i+look_back), :]
103+
dataX.append(a)
104+
dataY.append(dataset[i + look_back:i+look_back+look_forward, :])
105+
dataX=np.array(dataX)
106+
dataY=np.array(dataY)
107+
if look_back-look_forward>0:
108+
dataY_mod=np.zeros((dataY.shape[0],dataX.shape[1],dataY.shape[2]))
109+
dataY_mod[:,0:dataY.shape[1],:]=dataY
110+
else:
111+
dataY_mod=dataY
112+
return dataX, dataY_mod
113+
114+
def generate_timedistributed_forecast(model,x,prediction_length=10):
115+
## to be used when rnn is used for sequence to sequence mapping
116+
N,T,D=x.shape
117+
x_data=x[0,:,:].copy()
118+
x_revise=x.copy()
119+
for i in range(prediction_length):
120+
y_pred=model.predict(x_revise[0:1,:,:],batch_size=1)
121+
yf=y_pred[:,0,:]
122+
#print 'prediction:',yf
123+
x_data=np.vstack([x_data,yf])
124+
125+
#x=x.reshape(T,D) ## assume N=1 ... i.e. one sample
126+
#print 'data before prediction:\n',x
127+
x_revise[0,0:T-1,:]=x_revise[0,1:T,:]
128+
x_revise[0,T-1:T,:]=yf
129+
#x=x.reshape(1,T,D)
130+
#print 'data after appending prediction\n',x
131+
return x_data
132+
133+
##### Define Neural Network Models ###################
134+
def simple_test_rnn(T=1,D=1):
135+
input_shape=(T,D)
136+
input_img = Input(shape=input_shape)
137+
encoder=TimeDistributed(Dense(20,activation='relu'))(input_img)
138+
rnn=LSTM(10,activation='elu',return_sequences=True, stateful=False)(encoder)
139+
decoder=TimeDistributed(Dense(20,activation='relu'))(rnn)
140+
model=Model(input=input_img,output=decoder)
141+
return model
142+
143+
144+
def dense_auto(weights_path=None,input_shape=(784,),hidden_layers=None,nonlinearity='relu'):
145+
input_img = Input(shape=input_shape)
146+
147+
if hidden_layers!=None:
148+
if type(hidden_layers)!=list:
149+
hidden_layers=list(hidden_layers)
150+
for i,l in enumerate(hidden_layers):
151+
if i==0:
152+
encoded=Dense(l,activation=nonlinearity)(input_img)
153+
else:
154+
encoded=Dense(l,activation=nonlinearity)(encoded)
155+
156+
for i,l in reversed(list(enumerate(hidden_layers))):
157+
if i <len(hidden_layers)-1:
158+
if i==len(hidden_layers)-2:
159+
decoded=Dense(l,activation=nonlinearity)(encoded)
160+
else:
161+
decoded=Dense(l,activation=nonlinearity)(decoded)
162+
decoded=Dense(input_shape[0])(decoded)
163+
else:
164+
decoded=Dense(input_shape[0])(input_img)
165+
166+
model=Model(input=input_img,output=decoded)
167+
168+
if weights_path:
169+
print('Loading Model')
170+
model.load_weights(weights_path)
171+
return model
172+
173+
def rnn_dense_auto(weights_path=None,T=1,D=1,nonlinearity='relu',hidden_layers=None,recurrent_layers=None):
174+
input_shape=(T,D)
175+
input_img = Input(shape=input_shape)
176+
177+
if hidden_layers!=None:
178+
if type(hidden_layers)!=list:
179+
hidden_layers=list(hidden_layers)
180+
for i,l in enumerate(hidden_layers):
181+
if i==0:
182+
encoded=TimeDistributed(Dense(l,activation=nonlinearity))(input_img)
183+
else:
184+
encoded=TimeDistributed(Dense(l,activation=nonlinearity))(encoded)
185+
186+
for i,l in enumerate(recurrent_layers):
187+
if i==0:
188+
rnn=LSTM(l,return_sequences=True, stateful=False)(encoded)
189+
else:
190+
rnn=LSTM(l,return_sequences=True, stateful=False)(rnn)
191+
192+
for i,l in reversed(list(enumerate(hidden_layers))):
193+
if i <len(hidden_layers)-1:
194+
if i==len(hidden_layers)-2:
195+
decoded=TimeDistributed(Dense(l,activation=nonlinearity))(rnn)
196+
else:
197+
decoded=TimeDistributed(Dense(l,activation=nonlinearity))(decoded)
198+
decoded=TimeDistributed(Dense(D))(decoded)
199+
else:
200+
decoded=TimeDistributed(Dense(D))(input_img)
201+
202+
model=Model(input=input_img,output=decoded)
203+
204+
if weights_path:
205+
print('Loading Model')
206+
model.load_weights(weights_path)
207+
return model
208+
209+
210+
class autoencoder_preprocess():
211+
def __init__(self,img_size=(784,),noise_factor=0.):
212+
self.noise=noise_factor
213+
self.img_size=img_size
214+
self.lock = threading.Lock()
215+
216+
def add_noise(self,X_train):
217+
## Add noise to input data
218+
np.random.seed(100)
219+
ind=np.where(X_train==0)
220+
rn=self.noise*np.random.rand(np.shape(ind)[1])
221+
X_train[ind]=rn
222+
return X_train
223+
224+
def renormalize(self,X_train,mu,sigma):
225+
X_train=(X_train-mu)/sigma
226+
X_train = X_train.astype("float32")
227+
return X_train
228+
229+
def get_activations(model, layer, X_batch):
230+
get_activations = K.function([model.layers[0].input, K.learning_phase()], model.layers[layer].output)
231+
activations = get_activations([X_batch,0])
232+
return activations
233+
234+

0 commit comments

Comments
 (0)