Skip to content

Commit f510f5b

Browse files
committed
Added P1B3 to Release01
1 parent 724295d commit f510f5b

File tree

5 files changed

+239
-627
lines changed

5 files changed

+239
-627
lines changed

Pilot1/P1B3/p1b3.py

Lines changed: 200 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,12 @@
2222
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
2323

2424
file_path = os.path.dirname(os.path.realpath(__file__))
25-
lib_path = os.path.abspath(os.path.join(file_path, '..', 'common'))
25+
lib_path = os.path.abspath(os.path.join(file_path, '..' ))
2626
sys.path.append(lib_path)
2727
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
2828
sys.path.append(lib_path2)
2929

30-
import p1_common
31-
30+
import candle_keras as candle
3231

3332
logger = logging.getLogger(__name__)
3433

@@ -37,118 +36,205 @@
3736

3837
np.set_printoptions(threshold=np.nan)
3938

40-
def common_parser(parser):
41-
42-
parser.add_argument("--config_file", dest='config_file', type=str,
43-
default=os.path.join(file_path, 'p1b3_default_model.txt'),
44-
help="specify model configuration file")
45-
46-
# Parse has been split between arguments that are common with the default neon parser
47-
# and all the other options
48-
parser = p1_common.get_default_neon_parse(parser)
49-
parser = p1_common.get_p1_common_parser(parser)
50-
51-
# Arguments that are applicable just to p1b3
52-
parser = p1b3_parser(parser)
53-
54-
return parser
55-
56-
57-
def p1b3_parser(parser):
58-
59-
# Feature selection
60-
parser.add_argument("--cell_features", nargs='+',
61-
default=argparse.SUPPRESS,
62-
choices=['expression', 'mirna', 'proteome', 'all', 'categorical'],
63-
help="use one or more cell line feature sets: 'expression', 'mirna', 'proteome', 'all'; or use 'categorical' for one-hot encoding of cell lines")
64-
parser.add_argument("--drug_features", nargs='+',
65-
default=argparse.SUPPRESS,
66-
choices=['descriptors', 'latent', 'all', 'noise'],
67-
help="use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or random features; 'descriptors','latent', 'all', 'noise'")
68-
parser.add_argument("--cell_noise_sigma", type=float,
69-
help="standard deviation of guassian noise to add to cell line features during training")
70-
# Output selection
71-
parser.add_argument("--min_logconc", type=float,
72-
default=argparse.SUPPRESS,
73-
help="min log concentration of dose response data to use: -3.0 to -7.0")
74-
parser.add_argument("--max_logconc", type=float,
75-
default=argparse.SUPPRESS,
76-
help="max log concentration of dose response data to use: -3.0 to -7.0")
77-
parser.add_argument("--subsample",
78-
default=argparse.SUPPRESS,
79-
choices=['naive_balancing', 'none'],
80-
help="dose response subsample strategy; 'none' or 'naive_balancing'")
81-
parser.add_argument("--category_cutoffs", nargs='+', type=float,
82-
default=argparse.SUPPRESS,
83-
help="list of growth cutoffs (between -1 and +1) seperating non-response and response categories")
84-
# Sample data selection
85-
parser.add_argument("--test_cell_split", type=float,
86-
default=argparse.SUPPRESS,
87-
help="cell lines to use in test; if None use predefined unseen cell lines instead of sampling cell lines used in training")
88-
# Test random model
89-
parser.add_argument("--scramble", action="store_true",
90-
default=False,
91-
help="randomly shuffle dose response data")
92-
parser.add_argument("--workers", type=int,
93-
default=WORKERS,
94-
help="number of data generator workers")
95-
96-
return parser
97-
98-
def read_config_file(file):
99-
config = configparser.ConfigParser()
100-
config.read(file)
101-
section = config.sections()
102-
fileParams = {}
103-
104-
# default config values that we assume exists
105-
fileParams['activation']=eval(config.get(section[0],'activation'))
106-
fileParams['batch_size']=eval(config.get(section[0],'batch_size'))
107-
fileParams['batch_normalization']=eval(config.get(section[0],'batch_normalization'))
108-
fileParams['category_cutoffs']=eval(config.get(section[0],'category_cutoffs'))
109-
fileParams['cell_features']=eval(config.get(section[0],'cell_features'))
110-
fileParams['drop']=eval(config.get(section[0],'drop'))
111-
fileParams['drug_features']=eval(config.get(section[0],'drug_features'))
112-
fileParams['epochs']=eval(config.get(section[0],'epochs'))
113-
fileParams['feature_subsample']=eval(config.get(section[0],'feature_subsample'))
114-
fileParams['initialization']=eval(config.get(section[0],'initialization'))
115-
fileParams['learning_rate']=eval(config.get(section[0], 'learning_rate'))
116-
fileParams['loss']=eval(config.get(section[0],'loss'))
117-
fileParams['min_logconc']=eval(config.get(section[0],'min_logconc'))
118-
fileParams['max_logconc']=eval(config.get(section[0],'max_logconc'))
119-
fileParams['optimizer']=eval(config.get(section[0],'optimizer'))
120-
# fileParams['penalty']=eval(config.get(section[0],'penalty'))
121-
fileParams['rng_seed']=eval(config.get(section[0],'rng_seed'))
122-
fileParams['scaling']=eval(config.get(section[0],'scaling'))
123-
fileParams['subsample']=eval(config.get(section[0],'subsample'))
124-
fileParams['test_cell_split']=eval(config.get(section[0],'test_cell_split'))
125-
fileParams['validation_split']=eval(config.get(section[0],'validation_split'))
126-
fileParams['cell_noise_sigma']=eval(config.get(section[0],'cell_noise_sigma'))
127-
128-
# parse the remaining values
129-
for k,v in config.items(section[0]):
130-
if not k in fileParams:
131-
fileParams[k] = eval(v)
39+
class BenchmarkP1B3(candle.Benchmark):
40+
41+
def set_locals(self):
42+
"""Functionality to set variables specific for the benchmark
43+
- required: set of required parameters for the benchmark.
44+
- additional_definitions: list of dictionaries describing the additional parameters for the
45+
benchmark.
46+
"""
13247

48+
if required is not None:
49+
self.required = set(required)
50+
if additional_definitions is not None:
51+
self.additional_definitions = additional_definitions
52+
53+
additional_definitions = [
54+
# Feature selection
55+
{'name':'cell_features',
56+
'nargs':'+',
57+
#'default':'argparse.SUPPRESS',
58+
'choices':['expression', 'mirna', 'proteome', 'all', 'categorical'],
59+
'help':'use one or more cell line feature sets: "expression", "mirna", "proteome", "all"; or use "categorical" for one-hot encoding of cell lines'},
60+
{'name':'drug_features',
61+
'nargs':'+',
62+
#'default':'argparse.SUPPRESS',
63+
'choices':['descriptors', 'latent', 'all', 'noise'],
64+
'help':"use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or random features; 'descriptors','latent', 'all', 'noise'"},
65+
{'name':'cell_noise_sigma', 'type':float,
66+
'help':"standard deviation of guassian noise to add to cell line features during training"},
67+
# Output selection
68+
{'name':'min_logconc',
69+
'type':float,
70+
#'default':'argparse.SUPPRESS',
71+
'help':"min log concentration of dose response data to use: -3.0 to -7.0"},
72+
{'name':'max_logconc',
73+
'type':float,
74+
#'default':'argparse.SUPPRESS',
75+
'help':"max log concentration of dose response data to use: -3.0 to -7.0"},
76+
{'name':'subsample',
77+
#'default':'argparse.SUPPRESS',
78+
'choices':['naive_balancing', 'none'],
79+
'help':"dose response subsample strategy; 'none' or 'naive_balancing'"},
80+
{'name':'category_cutoffs',
81+
'nargs':'+',
82+
'type':float,
83+
#'default':'argparse.SUPPRESS',
84+
'help':"list of growth cutoffs (between -1 and +1) seperating non-response and response categories"},
85+
# Sample data selection
86+
{'name':'test_cell_split',
87+
'type':float,
88+
#'default':'argparse.SUPPRESS',
89+
'help':"cell lines to use in test; if None use predefined unseen cell lines instead of sampling cell lines used in training"},
90+
# Test random model
91+
{'name':'scramble',
92+
'type': candle.str2bool,
93+
'default': False,
94+
'help':'randomly shuffle dose response data'},
95+
{'name':'workers',
96+
'type':int,
97+
'default':WORKERS,
98+
'help':'number of data generator workers'}
99+
]
100+
101+
required = [
102+
'activation',
103+
'batch_size',
104+
'batch_normalization',
105+
'category_cutoffs',
106+
'cell_features',
107+
'drop',
108+
'drug_features',
109+
'epochs',
110+
'feature_subsample',
111+
'initialization',
112+
'learning_rate',
113+
'loss',
114+
'min_logconc',
115+
'max_logconc',
116+
'optimizer',
117+
# 'penalty',
118+
'rng_seed',
119+
'scaling',
120+
'subsample',
121+
'test_cell_split',
122+
'validation_split',
123+
'cell_noise_sigma'
124+
]
125+
126+
#def common_parser(parser):
127+
#
128+
# parser.add_argument("--config_file", dest='config_file', type=str,
129+
# default=os.path.join(file_path, 'p1b3_default_model.txt'),
130+
# help="specify model configuration file")
131+
#
132+
# # Parse has been split between arguments that are common with the default neon parser
133+
# # and all the other options
134+
# parser = candle.get_default_neon_parse(parser)
135+
# parser = p1_common.get_p1_common_parser(parser)
136+
#
137+
# # Arguments that are applicable just to p1b3
138+
# parser = p1b3_parser(parser)
139+
#
140+
# return parser
141+
142+
143+
#def p1b3_parser(parser):
144+
#
145+
# # Feature selection
146+
# parser.add_argument("--cell_features", nargs='+',
147+
# default=argparse.SUPPRESS,
148+
# choices=['expression', 'mirna', 'proteome', 'all', 'categorical'],
149+
# help="use one or more cell line feature sets: 'expression', 'mirna', 'proteome', 'all'; or use 'categorical' for one-hot encoding of cell lines")
150+
# parser.add_argument("--drug_features", nargs='+',
151+
# default=argparse.SUPPRESS,
152+
# choices=['descriptors', 'latent', 'all', 'noise'],
153+
# help="use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or random features; 'descriptors','latent', 'all', 'noise'")
154+
# parser.add_argument("--cell_noise_sigma", type=float,
155+
# help="standard deviation of guassian noise to add to cell line features during training")
156+
# # Output selection
157+
# parser.add_argument("--min_logconc", type=float,
158+
# default=argparse.SUPPRESS,
159+
# help="min log concentration of dose response data to use: -3.0 to -7.0")
160+
# parser.add_argument("--max_logconc", type=float,
161+
# default=argparse.SUPPRESS,
162+
# help="max log concentration of dose response data to use: -3.0 to -7.0")
163+
# parser.add_argument("--subsample",
164+
# default=argparse.SUPPRESS,
165+
# choices=['naive_balancing', 'none'],
166+
# help="dose response subsample strategy; 'none' or 'naive_balancing'")
167+
# parser.add_argument("--category_cutoffs", nargs='+', type=float,
168+
# default=argparse.SUPPRESS,
169+
# help="list of growth cutoffs (between -1 and +1) seperating non-response and response categories")
170+
# # Sample data selection
171+
# parser.add_argument("--test_cell_split", type=float,
172+
# default=argparse.SUPPRESS,
173+
# help="cell lines to use in test; if None use predefined unseen cell lines instead of sampling cell lines used in training")
174+
# # Test random model
175+
# parser.add_argument("--scramble", action="store_true",
176+
# default=False,
177+
# help="randomly shuffle dose response data")
178+
# parser.add_argument("--workers", type=int,
179+
# default=WORKERS,
180+
# help="number of data generator workers")
181+
#
182+
# return parser
183+
184+
#def read_config_file(file):
185+
# config = configparser.ConfigParser()
186+
# config.read(file)
187+
# section = config.sections()
188+
# fileParams = {}
189+
#
190+
# # default config values that we assume exists
191+
# fileParams['activation']=eval(config.get(section[0],'activation'))
192+
# fileParams['batch_size']=eval(config.get(section[0],'batch_size'))
193+
# fileParams['batch_normalization']=eval(config.get(section[0],'batch_normalization'))
194+
# fileParams['category_cutoffs']=eval(config.get(section[0],'category_cutoffs'))
195+
# fileParams['cell_features']=eval(config.get(section[0],'cell_features'))
196+
# fileParams['drop']=eval(config.get(section[0],'drop'))
197+
# fileParams['drug_features']=eval(config.get(section[0],'drug_features'))
198+
# fileParams['epochs']=eval(config.get(section[0],'epochs'))
199+
# fileParams['feature_subsample']=eval(config.get(section[0],'feature_subsample'))
200+
# fileParams['initialization']=eval(config.get(section[0],'initialization'))
201+
# fileParams['learning_rate']=eval(config.get(section[0], 'learning_rate'))
202+
# fileParams['loss']=eval(config.get(section[0],'loss'))
203+
# fileParams['min_logconc']=eval(config.get(section[0],'min_logconc'))
204+
# fileParams['max_logconc']=eval(config.get(section[0],'max_logconc'))
205+
# fileParams['optimizer']=eval(config.get(section[0],'optimizer'))
206+
## fileParams['penalty']=eval(config.get(section[0],'penalty'))
207+
# fileParams['rng_seed']=eval(config.get(section[0],'rng_seed'))
208+
# fileParams['scaling']=eval(config.get(section[0],'scaling'))
209+
# fileParams['subsample']=eval(config.get(section[0],'subsample'))
210+
# fileParams['test_cell_split']=eval(config.get(section[0],'test_cell_split'))
211+
# fileParams['validation_split']=eval(config.get(section[0],'validation_split'))
212+
# fileParams['cell_noise_sigma']=eval(config.get(section[0],'cell_noise_sigma'))
213+
#
214+
# # parse the remaining values
215+
# for k,v in config.items(section[0]):
216+
# if not k in fileParams:
217+
# fileParams[k] = eval(v)
218+
#
219+
220+
def check_params(fileParams):
133221
# Allow for either dense or convolutional layer specification
134222
# if none found exit
135223
try:
136-
fileParams['dense']=eval(config.get(section[0],'dense'))
137-
except configparser.NoOptionError:
138-
try:
139-
fileParams['conv']=eval(config.get(section[0],'conv'))
140-
except configparser.NoOptionError:
141-
print("Error ! No dense or conv layers specified. Wrong file !! ... exiting ")
224+
fileParams['dense']
225+
except KeyError:
226+
try:
227+
fileParams['conv']
228+
except KeyError:
229+
print("Error! No dense or conv layers specified. Wrong file !! ... exiting ")
142230
raise
143231
else:
144232
try:
145-
fileParams['pool']=eval(config.get(section[0],'pool'))
146-
except configparser.NoOptionError:
233+
fileParams['pool']
234+
except KeyError:
147235
fileParams['pool'] = None
148236
print("Warning ! No pooling specified after conv layer.")
149237

150-
return fileParams
151-
152238

153239
def extension_from_parameters(params, framework):
154240
"""Construct string for saving model with annotation of parameters"""
@@ -495,15 +581,15 @@ def load_dose_response(path, seed, dtype, min_logconc=-5., max_logconc=-5., subs
495581
def stage_data():
496582
server = 'http://ftp.mcs.anl.gov/pub/candle/public/benchmarks/P1B3/'
497583

498-
cell_expr_path = p1_common.get_p1_file(server+'P1B3_cellline_expressions.tsv')
499-
cell_mrna_path = p1_common.get_p1_file(server+'P1B3_cellline_mirna.tsv')
500-
cell_prot_path = p1_common.get_p1_file(server+'P1B3_cellline_proteome.tsv')
501-
cell_kino_path = p1_common.get_p1_file(server+'P1B3_cellline_kinome.tsv')
502-
drug_desc_path = p1_common.get_p1_file(server+'P1B3_drug_descriptors.tsv')
503-
drug_auen_path = p1_common.get_p1_file(server+'P1B3_drug_latent.csv')
504-
dose_resp_path = p1_common.get_p1_file(server+'P1B3_dose_response.csv')
505-
test_cell_path = p1_common.get_p1_file(server+'P1B3_test_celllines.txt')
506-
test_drug_path = p1_common.get_p1_file(server+'P1B3_test_drugs.txt')
584+
cell_expr_path = candle.fetch_file(server+'P1B3_cellline_expressions.tsv', 'Pilot1', untar=False)
585+
cell_mrna_path = candle.fetch_file(server+'P1B3_cellline_mirna.tsv', 'Pilot1', untar=False)
586+
cell_prot_path = candle.fetch_file(server+'P1B3_cellline_proteome.tsv', 'Pilot1', untar=False)
587+
cell_kino_path = candle.fetch_file(server+'P1B3_cellline_kinome.tsv', 'Pilot1', untar=False)
588+
drug_desc_path = candle.fetch_file(server+'P1B3_drug_descriptors.tsv', 'Pilot1', untar=False)
589+
drug_auen_path = candle.fetch_file(server+'P1B3_drug_latent.csv', 'Pilot1', untar=False)
590+
dose_resp_path = candle.fetch_file(server+'P1B3_dose_response.csv', 'Pilot1', untar=False)
591+
test_cell_path = candle.fetch_file(server+'P1B3_test_celllines.txt', 'Pilot1', untar=False)
592+
test_drug_path = candle.fetch_file(server+'P1B3_test_drugs.txt', 'Pilot1', untar=False)
507593

508594
return(cell_expr_path, cell_mrna_path, cell_prot_path, cell_kino_path,
509595
drug_desc_path, drug_auen_path, dose_resp_path, test_cell_path,

0 commit comments

Comments
 (0)