1
1
from __future__ import print_function
2
+
2
3
import numpy as np
4
+
5
+ from keras import backend as K
6
+
7
+ '''
8
+ from keras.layers import Input, Dense, Dropout, Activation
9
+ from keras.optimizers import SGD, Adam, RMSprop
10
+ from keras.models import Model
11
+ from keras.callbacks import ModelCheckpoint, CSVLogger, ReduceLROnPlateau
12
+
13
+ from sklearn.metrics import f1_score
14
+ '''
15
+
16
+ import p3b3 as bmk
17
+ import candle_keras as candle
18
+
19
+
3
20
import os , sys , gzip
4
- import urllib , zipfile
5
- TIMEOUT = 1800 # in sec; set this to -1 for no timeout
6
21
7
22
import keras
8
23
from keras import backend as K
13
28
from keras .layers import Input
14
29
from keras .models import Model
15
30
16
- from sklearn . metrics import f1_score
31
+ import keras_mt_shared_cnn
17
32
18
33
import argparse
19
34
20
- import p3b3
21
- import p3_common as p3c
22
- import p3_common_keras as p3ck
23
- from solr_keras import CandleRemoteMonitor , compute_trainable_params , TerminateOnTimeOut
24
-
25
- import keras_mt_shared_cnn
26
-
27
35
28
36
29
- def get_p3b3_parser ():
30
- parser = argparse .ArgumentParser (prog = 'p3b3_baseline' ,
31
- formatter_class = argparse .ArgumentDefaultsHelpFormatter ,
32
- description = 'Multi-task CNN for data extraction from clinical reports - Pilot 3 Benchmark 3' )
37
+ def initialize_parameters ():
33
38
34
- return p3b3 .common_parser (parser )
39
+ # Build benchmark object
40
+ p3b3Bmk = bmk .BenchmarkP3B3 (bmk .file_path , 'p3b3_default_model.txt' , 'keras' ,
41
+ prog = 'p3b3_baseline' , desc = 'Multi-task CNN for data extraction from clinical reports - Pilot 3 Benchmark 3' )
42
+
43
+ # Initialize parameters
44
+ gParameters = candle .initialize_parameters (p3b3Bmk )
45
+ #bmk.logger.info('Params: {}'.format(gParameters))
35
46
36
- def initialize_parameters ():
37
- parser = get_p3b3_parser ()
38
- args = parser .parse_args ()
39
- print ('Args' , args )
47
+ return gParameters
40
48
41
- GP = p3b3 .read_config_file (args .config_file )
42
- print (GP )
43
49
44
- GP = p3c .args_overwrite_config (args , GP )
45
- return GP
50
+ def fetch_data (gParameters ):
51
+ """ Downloads and decompresses the data if not locally available.
52
+ Since the training data depends on the model definition it is not loaded,
53
+ instead the local path where the raw data resides is returned
54
+ """
46
55
56
+ path = gParameters ['data_url' ]
57
+ fpath = candle .fetch_file (path + gParameters ['train_data' ], 'Pilot3' , untar = True )
58
+
59
+ return fpath
47
60
48
61
49
62
def run_cnn ( GP , train_x , train_y , test_x , test_y ,
@@ -73,8 +86,8 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
73
86
num_classes .append ( np .max ( train_y [ :, 3 ] ) + 1 )
74
87
75
88
76
- kerasDefaults = p3c .keras_default_config ()
77
- optimizer = p3ck .build_optimizer ( optimizer , learning_rate , kerasDefaults )
89
+ kerasDefaults = candle .keras_default_config ()
90
+ optimizer_run = candle .build_optimizer ( optimizer , learning_rate , kerasDefaults )
78
91
79
92
80
93
cnn = keras_mt_shared_cnn .init_export_network (
@@ -87,7 +100,7 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
87
100
concat_dropout_prob = dropout ,
88
101
emb_l2 = emb_l2 ,
89
102
w_l2 = w_l2 ,
90
- optimizer = optimizer )
103
+ optimizer = optimizer_run )
91
104
92
105
print ( cnn .summary () )
93
106
@@ -97,8 +110,11 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
97
110
'Dense2' : test_y [ :, 2 ],
98
111
'Dense3' : test_y [ :, 3 ] } )
99
112
100
- candleRemoteMonitor = CandleRemoteMonitor (params = GP )
101
- timeoutMonitor = TerminateOnTimeOut (TIMEOUT )
113
+ # candleRemoteMonitor = CandleRemoteMonitor(params= GP)
114
+ # timeoutMonitor = TerminateOnTimeOut(TIMEOUT)
115
+
116
+ candleRemoteMonitor = candle .CandleRemoteMonitor ( params = GP )
117
+ timeoutMonitor = candle .TerminateOnTimeOut ( GP [ 'timeout' ] )
102
118
103
119
history = cnn .fit (
104
120
x = np .array ( train_x ),
@@ -116,71 +132,72 @@ def run_cnn( GP, train_x, train_y, test_x, test_y,
116
132
return history
117
133
118
134
119
- def run ( GP ):
120
- filter_sizes = []
121
- num_filters = []
122
135
123
- start = GP [ 'filter_sizes' ]
124
- end = start + GP [ 'filter_sets' ]
125
- n_filters = GP [ 'num_filters' ]
126
- for k in range ( start , end ):
127
- filter_sizes .append ( k )
128
- num_filters .append ( n_filters )
136
+ def run (gParameters , fpath ):
129
137
130
- learning_rate = GP [ 'learning_rate' ]
131
- batch_size = GP [ 'batch_size' ]
132
- epochs = GP [ 'epochs' ]
133
- dropout = GP [ 'dropout' ]
134
- optimizer = GP [ 'optimizer' ]
138
+ # Get default parameters for initialization and optimizer functions
139
+ kerasDefaults = candle .keras_default_config ()
135
140
136
- wv_len = GP [ 'wv_len' ]
137
- emb_l2 = GP [ 'emb_l2' ]
138
- w_l2 = GP [ 'w_l2' ]
141
+ learning_rate = gParameters [ 'learning_rate' ]
142
+ batch_size = gParameters [ 'batch_size' ]
143
+ epochs = gParameters [ 'epochs' ]
144
+ dropout = gParameters [ 'dropout' ]
145
+ optimizer = gParameters [ 'optimizer' ]
146
+ wv_len = gParameters [ 'wv_len' ]
147
+ filter_sizes = gParameters [ 'filter_sizes' ]
148
+ filter_sets = gParameters [ 'filter_sets' ]
149
+ num_filters = gParameters [ 'num_filters' ]
150
+ emb_l2 = gParameters [ 'emb_l2' ]
151
+ w_l2 = gParameters [ 'w_l2' ]
139
152
140
-
141
- '''
142
- ## Read files
143
- file_path = os.path.dirname(os.path.realpath(__file__))
144
- print(file_path)
145
- lib_path = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
146
- sys.path.append(lib_path)
147
153
148
- from data_utils import get_file
149
- origin = 'http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P3B1/P3B1_data.tar.gz'
150
- data_set = 'P3B1_data'
151
- data_path = get_file(data_set, origin, untar=True, md5_hash=None, cache_subdir='P3B1' )
154
+ train_x = np . load ( fpath + '/train_X.npy' )
155
+ train_y = np . load ( fpath + '/train_Y.npy' )
156
+ test_x = np . load ( fpath + '/test_X.npy' )
157
+ test_y = np . load ( fpath + '/test_Y.npy' )
152
158
153
- print('Data downloaded and stored at: ' + os.path.dirname(data_path))
154
- print('Data path:' + data_path)
155
- '''
156
- data_path = '/lustre/atlas/proj-shared/csc249/yoonh/Benchmarks/Data/Pilot3'
157
159
158
- train_x = np .load ( data_path + '/train_X.npy' )
159
- train_y = np .load ( data_path + '/train_Y.npy' )
160
- test_x = np .load ( data_path + '/test_X.npy' )
161
- test_y = np .load ( data_path + '/test_Y.npy' )
160
+ run_filter_sizes = []
161
+ run_num_filters = []
162
162
163
+ for k in range ( filter_sets ):
164
+ run_filter_sizes .append ( filter_sizes + k )
165
+ run_num_filters .append ( num_filters )
163
166
164
167
ret = run_cnn (
165
- GP ,
168
+ gParameters ,
166
169
train_x , train_y , test_x , test_y ,
167
170
learning_rate = learning_rate ,
168
171
batch_size = batch_size ,
169
172
epochs = epochs ,
170
173
dropout = dropout ,
171
174
optimizer = optimizer ,
172
175
wv_len = wv_len ,
173
- filter_sizes = filter_sizes ,
174
- num_filters = num_filters ,
176
+ filter_sizes = run_filter_sizes ,
177
+ num_filters = run_num_filters ,
175
178
emb_l2 = emb_l2 ,
176
179
w_l2 = w_l2
177
180
)
178
181
179
- print ( 'Average loss:' , str ( ret .history ['val_loss' ] ) )
180
182
return ret
181
183
182
184
183
- if __name__ == "__main__" :
184
- gParameters = initialize_parameters ()
185
- avg_loss = run (gParameters )
185
+
186
+ def main ():
187
+
188
+ gParameters = initialize_parameters ()
189
+ fpath = fetch_data (gParameters )
190
+ avg_loss = run (gParameters , fpath )
191
+ print ( "Return: " , avg_loss )
192
+
193
+
194
+
195
+ if __name__ == '__main__' :
196
+ main ()
197
+ try :
198
+ K .clear_session ()
199
+ except AttributeError : # theano does not have this function
200
+ pass
201
+
202
+
186
203
0 commit comments