Skip to content

Commit 2ad5fd4

Browse files
authored
Update p3b3_baseline_keras2.py
1 parent 7330502 commit 2ad5fd4

File tree

1 file changed

+88
-71
lines changed

1 file changed

+88
-71
lines changed
Lines changed: 88 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,23 @@
11
from __future__ import print_function
2+
23
import numpy as np
4+
5+
from keras import backend as K
6+
7+
'''
8+
from keras.layers import Input, Dense, Dropout, Activation
9+
from keras.optimizers import SGD, Adam, RMSprop
10+
from keras.models import Model
11+
from keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau
12+
13+
from sklearn.metrics import f1_score
14+
'''
15+
16+
import p3b3 as bmk
17+
import candle_keras as candle
18+
19+
320
import os, sys, gzip
4-
import urllib, zipfile
5-
TIMEOUT=1800 # in sec; set this to -1 for no timeout
621

722
import keras
823
from keras import backend as K
@@ -13,37 +28,35 @@
1328
from keras.layers import Input
1429
from keras.models import Model
1530

16-
from sklearn.metrics import f1_score
31+
import keras_mt_shared_cnn
1732

1833
import argparse
1934

20-
import p3b3
21-
import p3_common as p3c
22-
import p3_common_keras as p3ck
23-
from solr_keras import CandleRemoteMonitor, compute_trainable_params, TerminateOnTimeOut
24-
25-
import keras_mt_shared_cnn
26-
2735

2836

29-
def get_p3b3_parser():
30-
parser = argparse.ArgumentParser(prog='p3b3_baseline',
31-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
32-
description='Multi-task CNN for data extraction from clinical reports - Pilot 3 Benchmark 3')
37+
def initialize_parameters():
3338

34-
return p3b3.common_parser(parser)
39+
# Build benchmark object
40+
p3b3Bmk = bmk.BenchmarkP3B3(bmk.file_path, 'p3b3_default_model.txt', 'keras',
41+
prog='p3b3_baseline', desc='Multi-task CNN for data extraction from clinical reports - Pilot 3 Benchmark 3')
42+
43+
# Initialize parameters
44+
gParameters = candle.initialize_parameters(p3b3Bmk)
45+
#bmk.logger.info('Params: {}'.format(gParameters))
3546

36-
def initialize_parameters():
37-
parser = get_p3b3_parser()
38-
args = parser.parse_args()
39-
print('Args', args)
47+
return gParameters
4048

41-
GP=p3b3.read_config_file(args.config_file)
42-
print(GP)
4349

44-
GP = p3c.args_overwrite_config(args, GP)
45-
return GP
50+
def fetch_data(gParameters):
51+
""" Downloads and decompresses the data if not locally available.
52+
Since the training data depends on the model definition it is not loaded,
53+
instead the local path where the raw data resides is returned
54+
"""
4655

56+
path = gParameters['data_url']
57+
fpath = candle.fetch_file(path + gParameters['train_data'], 'Pilot3', untar=True)
58+
59+
return fpath
4760

4861

4962
def run_cnn( GP, train_x, train_y, test_x, test_y,
@@ -73,8 +86,8 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
7386
num_classes.append( np.max( train_y[ :, 3 ] ) + 1 )
7487

7588

76-
kerasDefaults = p3c.keras_default_config()
77-
optimizer = p3ck.build_optimizer( optimizer, learning_rate, kerasDefaults )
89+
kerasDefaults = candle.keras_default_config()
90+
optimizer_run = candle.build_optimizer( optimizer, learning_rate, kerasDefaults )
7891

7992

8093
cnn = keras_mt_shared_cnn.init_export_network(
@@ -87,7 +100,7 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
87100
concat_dropout_prob = dropout,
88101
emb_l2= emb_l2,
89102
w_l2= w_l2,
90-
optimizer= optimizer )
103+
optimizer= optimizer_run )
91104

92105
print( cnn.summary() )
93106

@@ -97,8 +110,11 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
97110
'Dense2': test_y[ :, 2 ],
98111
'Dense3': test_y[ :, 3 ] } )
99112

100-
candleRemoteMonitor = CandleRemoteMonitor(params= GP)
101-
timeoutMonitor = TerminateOnTimeOut(TIMEOUT)
113+
# candleRemoteMonitor = CandleRemoteMonitor(params= GP)
114+
# timeoutMonitor = TerminateOnTimeOut(TIMEOUT)
115+
116+
candleRemoteMonitor = candle.CandleRemoteMonitor( params= GP )
117+
timeoutMonitor = candle.TerminateOnTimeOut( GP[ 'timeout' ] )
102118

103119
history = cnn.fit(
104120
x= np.array( train_x ),
@@ -116,71 +132,72 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
116132
return history
117133

118134

119-
def run( GP ):
120-
filter_sizes = []
121-
num_filters = []
122135

123-
start = GP[ 'filter_sizes' ]
124-
end = start + GP[ 'filter_sets' ]
125-
n_filters = GP[ 'num_filters' ]
126-
for k in range( start, end ):
127-
filter_sizes.append( k )
128-
num_filters.append( n_filters )
136+
def run(gParameters, fpath):
129137

130-
learning_rate = GP[ 'learning_rate' ]
131-
batch_size = GP[ 'batch_size' ]
132-
epochs = GP[ 'epochs' ]
133-
dropout = GP[ 'dropout' ]
134-
optimizer = GP[ 'optimizer' ]
138+
# Get default parameters for initialization and optimizer functions
139+
kerasDefaults = candle.keras_default_config()
135140

136-
wv_len = GP[ 'wv_len' ]
137-
emb_l2 = GP[ 'emb_l2' ]
138-
w_l2 = GP[ 'w_l2' ]
141+
learning_rate = gParameters[ 'learning_rate' ]
142+
batch_size = gParameters[ 'batch_size' ]
143+
epochs = gParameters[ 'epochs' ]
144+
dropout = gParameters[ 'dropout' ]
145+
optimizer = gParameters[ 'optimizer' ]
146+
wv_len = gParameters[ 'wv_len' ]
147+
filter_sizes = gParameters[ 'filter_sizes' ]
148+
filter_sets = gParameters[ 'filter_sets' ]
149+
num_filters = gParameters[ 'num_filters' ]
150+
emb_l2 = gParameters[ 'emb_l2' ]
151+
w_l2 = gParameters[ 'w_l2' ]
139152

140-
141-
'''
142-
## Read files
143-
file_path = os.path.dirname(os.path.realpath(__file__))
144-
print(file_path)
145-
lib_path = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
146-
sys.path.append(lib_path)
147153

148-
from data_utils import get_file
149-
origin = 'http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P3B1/P3B1_data.tar.gz'
150-
data_set = 'P3B1_data'
151-
data_path = get_file(data_set, origin, untar=True, md5_hash=None, cache_subdir='P3B1')
154+
train_x = np.load( fpath + '/train_X.npy' )
155+
train_y = np.load( fpath + '/train_Y.npy' )
156+
test_x = np.load( fpath + '/test_X.npy' )
157+
test_y = np.load( fpath + '/test_Y.npy' )
152158

153-
print('Data downloaded and stored at: ' + os.path.dirname(data_path))
154-
print('Data path:' + data_path)
155-
'''
156-
data_path = '/lustre/atlas/proj-shared/csc249/yoonh/Benchmarks/Data/Pilot3'
157159

158-
train_x = np.load( data_path + '/train_X.npy' )
159-
train_y = np.load( data_path + '/train_Y.npy' )
160-
test_x = np.load( data_path + '/test_X.npy' )
161-
test_y = np.load( data_path + '/test_Y.npy' )
160+
run_filter_sizes = []
161+
run_num_filters = []
162162

163+
for k in range( filter_sets ):
164+
run_filter_sizes.append( filter_sizes + k )
165+
run_num_filters.append( num_filters )
163166

164167
ret = run_cnn(
165-
GP,
168+
gParameters,
166169
train_x, train_y, test_x, test_y,
167170
learning_rate = learning_rate,
168171
batch_size = batch_size,
169172
epochs = epochs,
170173
dropout = dropout,
171174
optimizer = optimizer,
172175
wv_len = wv_len,
173-
filter_sizes = filter_sizes,
174-
num_filters = num_filters,
176+
filter_sizes = run_filter_sizes,
177+
num_filters = run_num_filters,
175178
emb_l2 = emb_l2,
176179
w_l2 = w_l2
177180
)
178181

179-
print( 'Average loss:', str( ret.history['val_loss'] ) )
180182
return ret
181183

182184

183-
if __name__ == "__main__":
184-
gParameters=initialize_parameters()
185-
avg_loss = run(gParameters)
185+
186+
def main():
187+
188+
gParameters = initialize_parameters()
189+
fpath = fetch_data(gParameters)
190+
avg_loss = run(gParameters, fpath)
191+
print( "Return: ", avg_loss )
192+
193+
194+
195+
if __name__ == '__main__':
196+
main()
197+
try:
198+
K.clear_session()
199+
except AttributeError: # theano does not have this function
200+
pass
201+
202+
186203

0 commit comments

Comments
 (0)