Skip to content

Commit 724295d

Browse files
committed
Added Combo to Release01
1 parent 0ba1d75 commit 724295d

File tree

5 files changed

+106
-136
lines changed

5 files changed

+106
-136
lines changed

Pilot1/Combo/NCI60.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@
1111
from sklearn.preprocessing import StandardScaler, MinMaxScaler, MaxAbsScaler
1212

1313
file_path = os.path.dirname(os.path.realpath(__file__))
14-
lib_path = os.path.abspath(os.path.join(file_path, '..', 'common'))
14+
lib_path = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
1515
sys.path.append(lib_path)
1616

17-
import p1_common
18-
17+
import candle_keras as candle
1918

2019
global_cache = {}
2120

@@ -25,7 +24,7 @@
2524

2625

2726
def get_file(url):
28-
return p1_common.get_p1_file(url)
27+
return candle.fetch_file(url, 'Pilot1')
2928

3029

3130
def impute_and_scale(df, scaling='std'):
@@ -444,7 +443,7 @@ def load_sample_rnaseq(ncols=None, scaling='std', add_prefix=True, use_landmark_
444443

445444
if preprocess_rnaseq and preprocess_rnaseq != 'none':
446445
scaling = None
447-
filename += ('_' + preprocess_rnaseq) # 'scale_per_source' or 'combat'
446+
filename += ('_' + preprocess_rnaseq) # 'source_scale' or 'combat'
448447

449448
path = get_file(DATA_URL + filename)
450449

@@ -489,7 +488,7 @@ def load_cell_expression_rnaseq(ncols=None, scaling='std', add_prefix=True, use_
489488

490489
if preprocess_rnaseq and preprocess_rnaseq != 'none':
491490
scaling = None
492-
filename += ('_' + preprocess_rnaseq) # 'scale_per_source' or 'combat'
491+
filename += ('_' + preprocess_rnaseq) # 'source_scale' or 'combat'
493492

494493
path = get_file(DATA_URL + filename)
495494

Pilot1/Combo/combo.py

Lines changed: 86 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -4,113 +4,100 @@
44
import sys
55
import logging
66
import argparse
7-
try:
8-
import configparser
9-
except ImportError:
10-
import ConfigParser as configparser
117

128
import pandas as pd
139
import numpy as np
1410

1511
file_path = os.path.dirname(os.path.realpath(__file__))
16-
lib_path = os.path.abspath(os.path.join(file_path, '..', 'common'))
17-
sys.path.append(lib_path)
18-
# lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
19-
# sys.path.append(lib_path2)
12+
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
13+
sys.path.append(lib_path2)
2014

21-
import p1_common
15+
import candle_keras as candle
2216

2317
logger = logging.getLogger(__name__)
2418

25-
26-
def common_parser(parser):
27-
parser.add_argument("--config-file", dest='config_file', type=str,
28-
default=os.path.join(file_path, 'combo_default_model.txt'),
29-
help="specify model configuration file")
30-
31-
# Parse has been split between arguments that are common with the default neon parser
32-
# and all the other options
33-
parser = p1_common.get_default_neon_parse(parser)
34-
parser = p1_common.get_p1_common_parser(parser)
35-
36-
# Arguments that are applicable just to combo
37-
parser = combo_parser(parser)
38-
39-
return parser
40-
41-
42-
def combo_parser(parser):
43-
parser.add_argument("--cell_features", nargs='+',
44-
default=argparse.SUPPRESS,
45-
choices=['expression', 'mirna', 'proteome', 'all', 'expression_5platform', 'expression_u133p2', 'rnaseq', 'categorical'],
46-
help="use one or more cell line feature sets: 'expression', 'mirna', 'proteome', 'all'; use all for ['expression', 'mirna', 'proteome']; use 'categorical' for one-hot encoded cell lines")
47-
parser.add_argument("--drug_features", nargs='+',
48-
default=argparse.SUPPRESS,
49-
choices=['descriptors', 'latent', 'all', 'categorical', 'noise'],
50-
help="use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or one-hot encoded drugs, or random features; 'descriptors','latent', 'all', 'categorical', 'noise'")
51-
parser.add_argument('--dense_feature_layers', nargs='+', type=int,
52-
default=argparse.SUPPRESS,
53-
help='number of neurons in intermediate dense layers in the feature encoding submodels')
54-
parser.add_argument("--use_landmark_genes", action="store_true",
55-
help="use the 978 landmark genes from LINCS (L1000) as expression features")
56-
parser.add_argument("--preprocess_rnaseq",
57-
choices=['scale_per_source', 'combat', 'none'],
58-
help="preprocessing method for RNAseq data; none for global normalization")
59-
parser.add_argument("--response_url",
60-
help="URL to combo dose response file")
61-
parser.add_argument("--residual", action="store_true",
62-
help="add skip connections to the layers")
63-
parser.add_argument('--reduce_lr', action='store_true',
64-
help='reduce learning rate on plateau')
65-
parser.add_argument('--warmup_lr', action='store_true',
66-
help='gradually increase learning rate on start')
67-
parser.add_argument('--base_lr', type=float,
68-
default=None,
69-
help='base learning rate')
70-
parser.add_argument('--cp', action='store_true',
71-
help='checkpoint models with best val_loss')
72-
parser.add_argument('--tb', action='store_true',
73-
help='use tensorboard')
74-
parser.add_argument('--max_val_loss', type=float,
75-
default=argparse.SUPPRESS,
76-
help='retrain if val_loss is greater than the threshold')
77-
parser.add_argument("--cv_partition",
78-
choices=['overlapping', 'disjoint', 'disjoint_cells'],
79-
default=argparse.SUPPRESS,
80-
help="cross validation paritioning scheme: overlapping or disjoint")
81-
parser.add_argument("--cv", type=int,
82-
default=argparse.SUPPRESS,
83-
help="cross validation folds")
84-
parser.add_argument("--gen", action="store_true",
85-
help="use generator for training and validation data")
86-
parser.add_argument("--exclude_cells", nargs='+',
87-
default=[],
88-
help="cell line IDs to exclude")
89-
parser.add_argument("--exclude_drugs", nargs='+',
90-
default=[],
91-
help="drug line IDs to exclude")
92-
93-
return parser
94-
95-
96-
def read_config_file(file):
97-
config = configparser.ConfigParser()
98-
config.read(file)
99-
section = config.sections()
100-
101-
args = [['activation', 'batch_size', 'dense', 'dense_feature_layers', 'drop',
19+
additional_definitions = [
20+
{'name':'cell_features',
21+
'nargs':'+',
22+
'choices':['expression', 'mirna', 'proteome', 'all', 'expression_5platform', 'expression_u133p2', 'rnaseq', 'categorical'],
23+
'help':"use one or more cell line feature sets: 'expression', 'mirna', 'proteome', 'all'; use all for ['expression', 'mirna', 'proteome']; use 'categorical' for one-hot encoded cell lines"},
24+
{'name':'drug_features', 'nargs':'+',
25+
'choices':['descriptors', 'latent', 'all', 'categorical', 'noise'],
26+
'help':"use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or one-hot encoded drugs, or random features; 'descriptors','latent', 'all', 'categorical', 'noise'"},
27+
{'name':'dense_feature_layers',
28+
'nargs':'+',
29+
'type':int,
30+
'help':'number of neurons in intermediate dense layers in the feature encoding submodels'},
31+
{'name':'use_landmark_genes',
32+
'type':candle.str2bool,
33+
'default':True, #action="store_true",
34+
'help':"use the 978 landmark genes from LINCS (L1000) as expression features"},
35+
{'name':'preprocess_rnaseq',
36+
'default':'none',
37+
'choices':['source_scale', 'combat', 'none'],
38+
'help':"preprocessing method for RNAseq data; none for global normalization"},
39+
{'name':'response_url',
40+
'default':None,
41+
'help':"URL to combo dose response file"},
42+
{'name':'residual',
43+
'type':candle.str2bool,
44+
'default':True, #action="store_true",
45+
'help':"add skip connections to the layers"},
46+
{'name':'reduce_lr',
47+
'type':candle.str2bool,
48+
'default':True, #action="store_true",
49+
'help':'reduce learning rate on plateau'},
50+
{'name':'warmup_lr',
51+
'type':candle.str2bool,
52+
'default':True, #action="store_true",
53+
'help':'gradually increase learning rate on start'},
54+
{'name':'base_lr', 'type':float,
55+
'default':None,
56+
'help':'base learning rate'},
57+
{'name':'cp',
58+
'type':candle.str2bool,
59+
'default':True, #action="store_true",
60+
'help':'checkpoint models with best val_loss'},
61+
{'name':'tb',
62+
'type':candle.str2bool,
63+
'default':True, #action="store_true",
64+
'help':'use tensorboard'},
65+
{'name':'max_val_loss', 'type':float,
66+
'help':'retrain if val_loss is greater than the threshold'},
67+
{'name':'cv_partition',
68+
'choices':['overlapping', 'disjoint', 'disjoint_cells'],
69+
'help':"cross validation paritioning scheme: overlapping or disjoint"},
70+
{'name':'cv', 'type':int,
71+
'help':"cross validation folds"},
72+
{'name':'gen',
73+
'type':candle.str2bool,
74+
'default':True, #action="store_true",
75+
'help':"use generator for training and validation data"},
76+
{'name':'exclude_cells', 'nargs':'+',
77+
'default':[],
78+
'help':"cell line IDs to exclude"},
79+
{'name':'exclude_drugs', 'nargs':'+',
80+
'default':[],
81+
'help':"drug line IDs to exclude"}
82+
]
83+
84+
85+
required = [ 'activation', 'batch_size', 'dense', 'dense_feature_layers', 'drop',
10286
'epochs', 'learning_rate', 'loss', 'optimizer', 'residual', 'rng_seed',
103-
'save', 'scaling', 'feature_subsample', 'validation_split'],
104-
['solr_root', 'timeout']]
105-
106-
file_params = {}
107-
for i, sec_args in enumerate(args):
108-
for arg in sec_args:
109-
file_params[arg] = eval(config.get(section[i], arg))
110-
111-
# parse the remaining values
112-
for k, v in config.items(section[0]):
113-
if not k in file_params:
114-
file_params[k] = eval(v)
87+
'save', 'scaling', 'feature_subsample', 'validation_split',
88+
'solr_root', 'timeout'
89+
]
90+
91+
class BenchmarkCombo(candle.Benchmark):
92+
def set_locals(self):
93+
"""Functionality to set variables specific for the benchmark
94+
- required: set of required parameters for the benchmark.
95+
- additional_definitions: list of dictionaries describing the additional parameters for the
96+
benchmark.
97+
"""
98+
99+
if required is not None:
100+
self.required = set(required)
101+
if additional_definitions is not None:
102+
self.additional_definitions = additional_definitions
115103

116-
return file_params

Pilot1/Combo/combo_baseline_keras2.py

Lines changed: 13 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,15 @@
3030
mpl.use('Agg')
3131
import matplotlib.pyplot as plt
3232

33-
3433
import combo
35-
import p1_common
36-
# import p1_common_keras
37-
from solr_keras import CandleRemoteMonitor, compute_trainable_params, TerminateOnTimeOut
38-
39-
# import argparser
40-
# from datasets import NCI60
4134

4235
import NCI60
4336
import combo
44-
37+
import candle_keras as candle
4538

4639
logger = logging.getLogger(__name__)
4740
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
4841

49-
5042
def set_seed(seed):
5143
os.environ['PYTHONHASHSEED'] = '0'
5244
np.random.seed(seed)
@@ -647,26 +639,18 @@ def build_model(loader, args, verbose=False):
647639

648640
return Model(inputs, output)
649641

642+
def initialize_parameters():
650643

644+
# Build benchmark object
645+
comboBmk = combo.BenchmarkCombo(combo.file_path, 'combo_default_model.txt', 'keras',
646+
prog='combo_baseline',
647+
desc = 'Build neural network based models to predict tumor response to drug pairs.')
651648

652-
def get_combo_parser():
653-
description = 'Build neural network based models to predict tumor response to drug pairs.'
654-
parser = argparse.ArgumentParser(prog='combo_baseline', formatter_class=argparse.ArgumentDefaultsHelpFormatter,
655-
description=description)
656-
return combo.common_parser(parser)
657-
658-
659-
def initialize_parameters():
660-
# Get command-line parameters
661-
parser = get_combo_parser()
662-
args = parser.parse_args()
663-
# Get parameters from configuration file
664-
file_params = combo.read_config_file(args.config_file)
665-
# Consolidate parameter set. Command-line parameters overwrite file configuration
666-
params = p1_common.args_overwrite_config(args, file_params)
667-
# print(params)
668-
return params
649+
# Initialize parameters
650+
gParameters = candle.initialize_parameters(comboBmk)
651+
#combo.logger.info('Params: {}'.format(gParameters))
669652

653+
return gParameters
670654

671655
class Struct:
672656
def __init__(self, **entries):
@@ -740,10 +724,10 @@ def warmup_scheduler(epoch):
740724
model.compile(loss=args.loss, optimizer=optimizer, metrics=[mae, r2])
741725

742726
# calculate trainable and non-trainable params
743-
params.update(compute_trainable_params(model))
727+
params.update(candle.compute_trainable_params(model))
744728

745-
candle_monitor = CandleRemoteMonitor(params=params)
746-
timeout_monitor = TerminateOnTimeOut(params['timeout'])
729+
candle_monitor = candle.CandleRemoteMonitor(params=params)
730+
timeout_monitor = candle.TerminateOnTimeOut(params['timeout'])
747731

748732
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001)
749733
warmup_lr = LearningRateScheduler(warmup_scheduler)

Pilot1/Combo/infer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_parser(description=None):
8181
parser.add_argument("--use_landmark_genes", action="store_true",
8282
help="use the 978 landmark genes from LINCS (L1000) as expression features")
8383
parser.add_argument("--preprocess_rnaseq",
84-
choices=['scale_per_source', 'combat', 'none'],
84+
choices=['source_scale', 'combat', 'none'],
8585
help="preprocessing method for RNAseq data; none for global normalization")
8686

8787
return parser

Pilot1/Combo/infer_dose.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def get_parser(description=None):
6363
parser.add_argument("--use_landmark_genes", action="store_true",
6464
help="use the 978 landmark genes from LINCS (L1000) as expression features")
6565
parser.add_argument("--preprocess_rnaseq",
66-
choices=['scale_per_source', 'combat', 'none'],
66+
choices=['source_scale', 'combat', 'none'],
6767
help="preprocessing method for RNAseq data; none for global normalization")
6868
parser.add_argument("--skip_single_prediction_cleanup", action="store_true",
6969
help="skip removing single drug predictions with two different concentrations")

0 commit comments

Comments
 (0)