Skip to content

Commit 451778c

Browse files
committed
Added TC1 to Release01
1 parent f510f5b commit 451778c

File tree

2 files changed

+90
-137
lines changed

2 files changed

+90
-137
lines changed

Pilot1/TC1/tc1.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from __future__ import print_function
2+
3+
import os
4+
import sys
5+
import gzip
6+
import logging
7+
8+
file_path = os.path.dirname(os.path.realpath(__file__))
9+
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
10+
sys.path.append(lib_path2)
11+
12+
import candle_keras as candle
13+
14+
logger = logging.getLogger(__name__)
15+
16+
additional_definitions = [
17+
{'name':'pool',
18+
'nargs':'+',
19+
'type': int,
20+
'help':'network structure of shared layer'},
21+
]
22+
23+
required = [
24+
'data_url',
25+
'train_data',
26+
'test_data',
27+
'model_name',
28+
'conv',
29+
'dense',
30+
'activation',
31+
'out_act',
32+
'loss',
33+
'optimizer',
34+
'feature_subsample',
35+
'metrics',
36+
'epochs',
37+
'batch_size',
38+
'drop',
39+
'classes',
40+
'pool',
41+
'save'
42+
]
43+
44+
45+
class BenchmarkTC1(candle.Benchmark):
46+
47+
def set_locals(self):
48+
"""Functionality to set variables specific for the benchmark
49+
- required: set of required parameters for the benchmark.
50+
- additional_definitions: list of dictionaries describing the additional parameters for the
51+
benchmark.
52+
"""
53+
54+
if required is not None:
55+
self.required = set(required)
56+
if additional_definitions is not None:
57+
self.additional_definitions = additional_definitions
58+
59+
60+
def load_data(params):
61+
62+
train_path = candle.fetch_file(params['data_url'] + params['train_data'], 'Pilot1')
63+
test_path = candle.fetch_file(params['data_url'] + params['test_data'], 'Pilot1')
64+
65+
if params['feature_subsample'] > 0:
66+
usecols = list(range(params['feature_subsample']))
67+
else:
68+
usecols = None
69+
70+
71+
return candle.load_Xy_data_noheader(train_path, test_path, params['classes'], usecols,
72+
scaling='maxabs',dtype=params['datatype'])

Pilot1/TC1/tc1_baseline_keras2.py

Lines changed: 18 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import print_function
2+
13
import pandas as pd
24
import numpy as np
35
import os
@@ -11,7 +13,7 @@
1113

1214
from keras import backend as K
1315

14-
from keras.layers import Input, Dense, Dropout, Activation, Conv1D, MaxPooling1D, Flatten, LocallyConnected1D
16+
from keras.layers import Input, Dense, Dropout, Activation, Conv1D, MaxPooling1D, Flatten
1517
from keras.optimizers import SGD, Adam, RMSprop
1618
from keras.models import Sequential, Model, model_from_json, model_from_yaml
1719
from keras.utils import np_utils
@@ -21,135 +23,29 @@
2123
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
2224

2325
file_path = os.path.dirname(os.path.realpath(__file__))
24-
lib_path = os.path.abspath(os.path.join(file_path, '..', 'common'))
25-
sys.path.append(lib_path)
2626
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
2727
sys.path.append(lib_path2)
2828

29-
import data_utils
30-
import p1_common
31-
32-
#EPOCH = 400
33-
#BATCH = 20
34-
#CLASSES = 2
35-
36-
#PL = 60484 # 1 + 60483 these are the width of the RNAseq datasets
37-
#P = 60483 # 60483
38-
#DR = 0.1 # Dropout rate
39-
40-
def common_parser(parser):
41-
42-
parser.add_argument("--config_file", dest='config_file', type=str,
43-
default=os.path.join(file_path, 'tc1_default_model.txt'),
44-
help="specify model configuration file")
45-
46-
# Parse has been split between arguments that are common with the default neon parser
47-
# and all the other options
48-
parser = p1_common.get_default_neon_parse(parser)
49-
parser = p1_common.get_p1_common_parser(parser)
50-
51-
return parser
52-
53-
def get_tc1_parser():
54-
55-
parser = argparse.ArgumentParser(prog='tc1_baseline', formatter_class=argparse.ArgumentDefaultsHelpFormatter,
56-
description='Train Autoencoder - Pilot 1 Benchmark 1')
29+
import tc1 as bmk
30+
import candle_keras as candle
5731

58-
return common_parser(parser)
59-
60-
def read_config_file(file):
61-
config=configparser.ConfigParser()
62-
config.read(file)
63-
section=config.sections()
64-
fileParams={}
65-
66-
fileParams['data_url']=eval(config.get(section[0],'data_url'))
67-
fileParams['train_data']=eval(config.get(section[0],'train_data'))
68-
fileParams['test_data']=eval(config.get(section[0],'test_data'))
69-
fileParams['model_name']=eval(config.get(section[0],'model_name'))
70-
fileParams['conv']=eval(config.get(section[0],'conv'))
71-
fileParams['dense']=eval(config.get(section[0],'dense'))
72-
fileParams['activation']=eval(config.get(section[0],'activation'))
73-
fileParams['out_act']=eval(config.get(section[0],'out_act'))
74-
fileParams['loss']=eval(config.get(section[0],'loss'))
75-
fileParams['optimizer']=eval(config.get(section[0],'optimizer'))
76-
fileParams['feature_subsample']=eval(config.get(section[0],'feature_subsample'))
77-
fileParams['metrics']=eval(config.get(section[0],'metrics'))
78-
fileParams['epochs']=eval(config.get(section[0],'epochs'))
79-
fileParams['batch_size']=eval(config.get(section[0],'batch_size'))
80-
fileParams['drop']=eval(config.get(section[0],'drop'))
81-
fileParams['classes']=eval(config.get(section[0],'classes'))
82-
fileParams['pool']=eval(config.get(section[0],'pool'))
83-
fileParams['save']=eval(config.get(section[0], 'save'))
84-
85-
return fileParams
8632

8733
def initialize_parameters():
88-
# Get command-line parameters
89-
parser = get_tc1_parser()
90-
args = parser.parse_args()
91-
#print('Args:', args)
92-
# Get parameters from configuration file
93-
fileParameters = read_config_file(args.config_file)
94-
#print ('Params:', fileParameters)
95-
# Consolidate parameter set. Command-line parameters overwrite file configuration
96-
gParameters = p1_common.args_overwrite_config(args, fileParameters)
97-
return gParameters
98-
99-
100-
def load_data(train_path, test_path, gParameters):
101-
102-
print('Loading data...')
103-
if gParameters['feature_subsample'] > 0:
104-
usecols = list(range(gParameters['feature_subsample']))
105-
else:
106-
usecols = None
107-
df_train = (pd.read_csv(train_path, header=None, usecols=usecols).values).astype('float32')
108-
df_test = (pd.read_csv(test_path, header=None, usecols=usecols).values).astype('float32')
109-
print('done')
110-
111-
print('df_train shape:', df_train.shape)
112-
print('df_test shape:', df_test.shape)
11334

114-
seqlen = df_train.shape[1]
35+
# Build benchmark object
36+
tc1Bmk = bmk.BenchmarkTC1(file_path, 'tc1_default_model.txt', 'keras',
37+
prog='tc1_baseline', desc='Multi-task (DNN) for data extraction from clinical reports - Pilot 3 Benchmark 1')
11538

116-
df_y_train = df_train[:,0].astype('int')
117-
df_y_test = df_test[:,0].astype('int')
39+
# Initialize parameters
40+
gParameters = candle.initialize_parameters(tc1Bmk)
41+
#benchmark.logger.info('Params: {}'.format(gParameters))
11842

119-
Y_train = np_utils.to_categorical(df_y_train,gParameters['classes'])
120-
Y_test = np_utils.to_categorical(df_y_test,gParameters['classes'])
121-
122-
df_x_train = df_train[:, 1:seqlen].astype(np.float32)
123-
df_x_test = df_test[:, 1:seqlen].astype(np.float32)
124-
125-
# X_train = df_x_train.as_matrix()
126-
# X_test = df_x_test.as_matrix()
127-
128-
X_train = df_x_train
129-
X_test = df_x_test
130-
131-
scaler = MaxAbsScaler()
132-
mat = np.concatenate((X_train, X_test), axis=0)
133-
mat = scaler.fit_transform(mat)
134-
135-
X_train = mat[:X_train.shape[0], :]
136-
X_test = mat[X_train.shape[0]:, :]
137-
138-
return X_train, Y_train, X_test, Y_test
43+
return gParameters
13944

14045

14146
def run(gParameters):
14247

143-
print ('Params:', gParameters)
144-
145-
file_train = gParameters['train_data']
146-
file_test = gParameters['test_data']
147-
url = gParameters['data_url']
148-
149-
train_file = data_utils.get_file(file_train, url+file_train, cache_subdir='Pilot1')
150-
test_file = data_utils.get_file(file_test, url+file_test, cache_subdir='Pilot1')
151-
152-
X_train, Y_train, X_test, Y_test = load_data(train_file, test_file, gParameters)
48+
X_train, Y_train, X_test, Y_test = bmk.load_data(gParameters)
15349

15450
print('X_train shape:', X_train.shape)
15551
print('X_test shape:', X_test.shape)
@@ -169,6 +65,7 @@ def run(gParameters):
16965

17066
model = Sequential()
17167
dense_first = True
68+
17269
layer_list = list(range(0, len(gParameters['conv']), 3))
17370
for l, i in enumerate(layer_list):
17471
filters = gParameters['conv'][i]
@@ -212,26 +109,8 @@ def run(gParameters):
212109
model.add(Flatten())
213110

214111
model.add(Dense(gParameters['classes']))
215-
216112
model.add(Activation(gParameters['out_act']))
217113

218-
#Reference case
219-
#model.add(Conv1D(filters=128, kernel_size=20, strides=1, padding='valid', input_shape=(P, 1)))
220-
#model.add(Activation('relu'))
221-
#model.add(MaxPooling1D(pool_size=1))
222-
#model.add(Conv1D(filters=128, kernel_size=10, strides=1, padding='valid'))
223-
#model.add(Activation('relu'))
224-
#model.add(MaxPooling1D(pool_size=10))
225-
#model.add(Flatten())
226-
#model.add(Dense(200))
227-
#model.add(Activation('relu'))
228-
#model.add(Dropout(0.1))
229-
#model.add(Dense(20))
230-
#model.add(Activation('relu'))
231-
#model.add(Dropout(0.1))
232-
#model.add(Dense(CLASSES))
233-
#model.add(Activation('softmax'))
234-
235114
model.summary()
236115

237116
model.compile(loss=gParameters['loss'],
@@ -241,8 +120,8 @@ def run(gParameters):
241120
output_dir = gParameters['save']
242121
if not os.path.exists(output_dir):
243122
os.makedirs(output_dir)
244-
# set up a bunch of callbacks to do work during model training..
245-
123+
124+
# set up callbacks to do work during model training..
246125
model_name = gParameters['model_name']
247126
path = '{}/{}.autosave.model.h5'.format(output_dir, model_name)
248127
checkpointer = ModelCheckpoint(filepath=path, verbose=1, save_weights_only=False, save_best_only=True)
@@ -324,6 +203,7 @@ def run(gParameters):
324203

325204
return history
326205

206+
327207
def main():
328208

329209
gParameters = initialize_parameters()
@@ -335,3 +215,4 @@ def main():
335215
K.clear_session()
336216
except AttributeError: # theano does not have this function
337217
pass
218+

0 commit comments

Comments
 (0)