Skip to content

Commit 32a2f16

Browse files
committed
Added P1B1 to Release01
1 parent 3d39cb7 commit 32a2f16

File tree

6 files changed

+135
-732
lines changed

6 files changed

+135
-732
lines changed

Pilot1/P1B1/p1b1.py

Lines changed: 94 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,6 @@
33
import os
44
import sys
55
import logging
6-
import argparse
7-
try:
8-
import configparser
9-
except ImportError:
10-
import ConfigParser as configparser
116

127
import pandas as pd
138
import numpy as np
@@ -17,101 +12,100 @@
1712
from scipy.stats.stats import pearsonr
1813

1914
file_path = os.path.dirname(os.path.realpath(__file__))
20-
lib_path = os.path.abspath(os.path.join(file_path, '..', 'common'))
21-
sys.path.append(lib_path)
15+
#lib_path = os.path.abspath(os.path.join(file_path, '..'))
16+
#sys.path.append(lib_path)
2217
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
2318
sys.path.append(lib_path2)
2419

25-
import p1_common
26-
27-
url_p1b1 = 'http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B1/'
28-
file_train = 'P1B1.dev.train.csv'
29-
file_test = 'P1B1.dev.test.csv'
20+
import candle_keras as candle
3021

3122
logger = logging.getLogger(__name__)
3223

33-
def common_parser(parser):
34-
35-
parser.add_argument("--config-file", dest='config_file', type=str,
36-
default=os.path.join(file_path, 'p1b1_default_model.txt'),
37-
help="specify model configuration file")
38-
39-
# Parse has been split between arguments that are common with the default neon parser
40-
# and all the other options
41-
parser = p1_common.get_default_neon_parse(parser)
42-
parser = p1_common.get_p1_common_parser(parser)
43-
44-
# Arguments that are applicable just to p1b1
45-
parser = p1b1_parser(parser)
46-
47-
return parser
48-
49-
50-
def p1b1_parser(parser):
51-
parser.add_argument("--latent_dim", type=int,
52-
default=argparse.SUPPRESS,
53-
help="latent dimensions")
54-
parser.add_argument('-m', '--model',
55-
default=argparse.SUPPRESS,
56-
help='model to use: ae, vae, cvae')
57-
parser.add_argument("--use_landmark_genes", action="store_true",
58-
help="use the 978 landmark genes from LINCS (L1000) as expression features")
59-
parser.add_argument("--residual", action="store_true",
60-
help="add skip connections to the layers")
61-
parser.add_argument('--reduce_lr', action='store_true',
62-
help='reduce learning rate on plateau')
63-
parser.add_argument('--warmup_lr', action='store_true',
64-
help='gradually increase learning rate on start')
65-
parser.add_argument('--base_lr', type=float,
66-
default=None,
67-
help='base learning rate')
68-
parser.add_argument("--epsilon_std", type=float,
69-
default=argparse.SUPPRESS,
70-
help="epsilon std for sampling latent noise")
71-
parser.add_argument('--cp', action='store_true',
72-
help='checkpoint models with best val_loss')
73-
parser.add_argument('--tb', action='store_true',
74-
help='use tensorboard')
75-
parser.add_argument('--tsne', action='store_true',
76-
help='generate tsne plot of the latent representation')
77-
78-
return parser
79-
80-
81-
def read_config_file(file):
82-
config = configparser.ConfigParser()
83-
config.read(file)
84-
section = config.sections()
85-
file_params = {}
86-
file_params['activation'] = eval(config.get(section[0], 'activation'))
87-
file_params['batch_size'] = eval(config.get(section[0], 'batch_size'))
88-
file_params['dense'] = eval(config.get(section[0], 'dense'))
89-
file_params['drop'] = eval(config.get(section[0], 'drop'))
90-
file_params['epochs'] = eval(config.get(section[0], 'epochs'))
91-
file_params['initialization'] = eval(config.get(section[0], 'initialization'))
92-
file_params['learning_rate'] = eval(config.get(section[0], 'learning_rate'))
93-
file_params['loss'] = eval(config.get(section[0], 'loss'))
94-
file_params['noise_factor'] = eval(config.get(section[0], 'noise_factor'))
95-
file_params['optimizer'] = eval(config.get(section[0], 'optimizer'))
96-
file_params['rng_seed'] = eval(config.get(section[0], 'rng_seed'))
97-
file_params['model'] = eval(config.get(section[0], 'model'))
98-
file_params['scaling'] = eval(config.get(section[0], 'scaling'))
99-
file_params['validation_split'] = eval(config.get(section[0], 'validation_split'))
100-
file_params['latent_dim'] = eval(config.get(section[0], 'latent_dim'))
101-
file_params['feature_subsample'] = eval(config.get(section[0], 'feature_subsample'))
102-
file_params['batch_normalization'] = eval(config.get(section[0], 'batch_normalization'))
103-
file_params['epsilon_std'] = eval(config.get(section[0], 'epsilon_std'))
104-
105-
file_params['solr_root'] = eval(config.get(section[1], 'solr_root'))
106-
file_params['timeout'] = eval(config.get(section[1], 'timeout'))
107-
108-
# parse the remaining values
109-
for k, v in config.items(section[0]):
110-
if not k in file_params:
111-
file_params[k] = eval(v)
112-
113-
return file_params
114-
24+
additional_definitions = [
25+
{'name':'latent_dim',
26+
'action':'store',
27+
'type': int,
28+
'help':'latent dimensions'},
29+
{'name':'model',
30+
'default':'ae',
31+
'choices':['ae', 'vae', 'cvae'],
32+
'help':'model to use: ae, vae, cvae'},
33+
{'name':'use_landmark_genes',
34+
'type': candle.str2bool,
35+
'default': False,
36+
'help':'use the 978 landmark genes from LINCS (L1000) as expression features'},
37+
{'name':'residual',
38+
'type': candle.str2bool,
39+
'default': False,
40+
'help':'add skip connections to the layers'},
41+
{'name':'reduce_lr',
42+
'type': candle.str2bool,
43+
'default': False,
44+
'help':'reduce learning rate on plateau'},
45+
{'name':'warmup_lr',
46+
'type': candle.str2bool,
47+
'default': False,
48+
'help':'gradually increase learning rate on start'},
49+
{'name':'base_lr',
50+
'type': float,
51+
'help':'base learning rate'},
52+
{'name':'epsilon_std',
53+
'type': float,
54+
'help':'epsilon std for sampling latent noise'},
55+
{'name':'cp',
56+
'type': candle.str2bool,
57+
'default': False,
58+
'help':'checkpoint models with best val_loss'},
59+
#{'name':'shuffle',
60+
#'type': candle.str2bool,
61+
#'default': False,
62+
#'help':'shuffle data'},
63+
{'name':'tb',
64+
'type': candle.str2bool,
65+
'default': False,
66+
'help':'use tensorboard'},
67+
{'name':'tsne',
68+
'type': candle.str2bool,
69+
'default': False,
70+
'help':'generate tsne plot of the latent representation'}
71+
]
72+
73+
required = [
74+
'activation',
75+
'batch_size',
76+
'dense',
77+
'drop',
78+
'epochs',
79+
'initialization',
80+
'learning_rate',
81+
'loss',
82+
'noise_factor',
83+
'optimizer',
84+
'rng_seed',
85+
'model',
86+
'scaling',
87+
'validation_split',
88+
'latent_dim',
89+
'feature_subsample',
90+
'batch_normalization',
91+
'epsilon_std',
92+
'solr_root',
93+
'timeout'
94+
]
95+
96+
class BenchmarkP1B1(candle.Benchmark):
97+
98+
def set_locals(self):
99+
"""Functionality to set variables specific for the benchmark
100+
- required: set of required parameters for the benchmark.
101+
- additional_definitions: list of dictionaries describing the additional parameters for the
102+
benchmark.
103+
"""
104+
105+
if required is not None:
106+
self.required = set(required)
107+
if additional_definitions is not None:
108+
self.additional_definitions = additional_definitions
115109

116110
def extension_from_parameters(params, framework=''):
117111
"""Construct string for saving model with annotation of parameters"""
@@ -155,17 +149,17 @@ def load_data(params, seed):
155149

156150
if params['use_landmark_genes']:
157151
lincs_file = 'lincs1000.tsv'
158-
lincs_path = p1_common.get_p1_file(url_p1b1 + lincs_file)
152+
lincs_path = candle.fetch_file(params['url_p1b1'] + lincs_file, 'Pilot1')
159153
df_l1000 = pd.read_csv(lincs_path, sep='\t')
160154
x_cols = df_l1000['gdc'].tolist()
161155
drop_cols = None
162156
else:
163157
x_cols = None
164158

165-
train_path = p1_common.get_p1_file(url_p1b1 + file_train)
166-
test_path = p1_common.get_p1_file(url_p1b1 + file_test)
159+
train_path = candle.fetch_file(params['url_p1b1'] + params['file_train'], 'Pilot1')
160+
test_path = candle.fetch_file(params['url_p1b1'] + params['file_test'], 'Pilot1')
167161

168-
return p1_common.load_csv_data(train_path, test_path,
162+
return candle.load_csv_data(train_path, test_path,
169163
x_cols=x_cols,
170164
y_cols=y_cols,
171165
drop_cols=drop_cols,
@@ -190,14 +184,14 @@ def load_data_orig(params, seed):
190184

191185
if params['use_landmark_genes']:
192186
lincs_file = 'lincs1000.tsv'
193-
lincs_path = p1_common.get_p1_file(url_p1b1 + lincs_file)
187+
lincs_path = candle.fetch_file(url_p1b1 + lincs_file)
194188
df_l1000 = pd.read_csv(lincs_path, sep='\t')
195189
usecols = df_l1000['gdc']
196190
drop_cols = None
197191
else:
198192
usecols = None
199193

200-
return p1_common.load_X_data(url_p1b1, file_train, file_test,
194+
return candle.load_X_data(params['url_p1b1'], params['file_train'], params['file_test'],
201195
drop_cols=drop_cols,
202196
onehot_cols=onehot_cols,
203197
usecols=usecols,

0 commit comments

Comments
 (0)