|
4 | 4 | import sys
|
5 | 5 | import logging
|
6 | 6 | import argparse
|
7 |
| -try: |
8 |
| - import configparser |
9 |
| -except ImportError: |
10 |
| - import ConfigParser as configparser |
11 | 7 |
|
12 | 8 | import pandas as pd
|
13 | 9 | import numpy as np
|
14 | 10 |
|
15 | 11 | file_path = os.path.dirname(os.path.realpath(__file__))
|
16 |
| -lib_path = os.path.abspath(os.path.join(file_path, '..', 'common')) |
17 |
| -sys.path.append(lib_path) |
18 |
| -# lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common')) |
19 |
| -# sys.path.append(lib_path2) |
| 12 | +lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common')) |
| 13 | +sys.path.append(lib_path2) |
20 | 14 |
|
21 |
| -import p1_common |
| 15 | +import candle_keras as candle |
22 | 16 |
|
23 | 17 | logger = logging.getLogger(__name__)
|
24 | 18 |
|
25 |
| - |
26 |
| -def common_parser(parser): |
27 |
| - parser.add_argument("--config-file", dest='config_file', type=str, |
28 |
| - default=os.path.join(file_path, 'combo_default_model.txt'), |
29 |
| - help="specify model configuration file") |
30 |
| - |
31 |
| - # Parse has been split between arguments that are common with the default neon parser |
32 |
| - # and all the other options |
33 |
| - parser = p1_common.get_default_neon_parse(parser) |
34 |
| - parser = p1_common.get_p1_common_parser(parser) |
35 |
| - |
36 |
| - # Arguments that are applicable just to combo |
37 |
| - parser = combo_parser(parser) |
38 |
| - |
39 |
| - return parser |
40 |
| - |
41 |
| - |
42 |
| -def combo_parser(parser): |
43 |
| - parser.add_argument("--cell_features", nargs='+', |
44 |
| - default=argparse.SUPPRESS, |
45 |
| - choices=['expression', 'mirna', 'proteome', 'all', 'expression_5platform', 'expression_u133p2', 'rnaseq', 'categorical'], |
46 |
| - help="use one or more cell line feature sets: 'expression', 'mirna', 'proteome', 'all'; use all for ['expression', 'mirna', 'proteome']; use 'categorical' for one-hot encoded cell lines") |
47 |
| - parser.add_argument("--drug_features", nargs='+', |
48 |
| - default=argparse.SUPPRESS, |
49 |
| - choices=['descriptors', 'latent', 'all', 'categorical', 'noise'], |
50 |
| - help="use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or one-hot encoded drugs, or random features; 'descriptors','latent', 'all', 'categorical', 'noise'") |
51 |
| - parser.add_argument('--dense_feature_layers', nargs='+', type=int, |
52 |
| - default=argparse.SUPPRESS, |
53 |
| - help='number of neurons in intermediate dense layers in the feature encoding submodels') |
54 |
| - parser.add_argument("--use_landmark_genes", action="store_true", |
55 |
| - help="use the 978 landmark genes from LINCS (L1000) as expression features") |
56 |
| - parser.add_argument("--preprocess_rnaseq", |
57 |
| - choices=['scale_per_source', 'combat', 'none'], |
58 |
| - help="preprocessing method for RNAseq data; none for global normalization") |
59 |
| - parser.add_argument("--response_url", |
60 |
| - help="URL to combo dose response file") |
61 |
| - parser.add_argument("--residual", action="store_true", |
62 |
| - help="add skip connections to the layers") |
63 |
| - parser.add_argument('--reduce_lr', action='store_true', |
64 |
| - help='reduce learning rate on plateau') |
65 |
| - parser.add_argument('--warmup_lr', action='store_true', |
66 |
| - help='gradually increase learning rate on start') |
67 |
| - parser.add_argument('--base_lr', type=float, |
68 |
| - default=None, |
69 |
| - help='base learning rate') |
70 |
| - parser.add_argument('--cp', action='store_true', |
71 |
| - help='checkpoint models with best val_loss') |
72 |
| - parser.add_argument('--tb', action='store_true', |
73 |
| - help='use tensorboard') |
74 |
| - parser.add_argument('--max_val_loss', type=float, |
75 |
| - default=argparse.SUPPRESS, |
76 |
| - help='retrain if val_loss is greater than the threshold') |
77 |
| - parser.add_argument("--cv_partition", |
78 |
| - choices=['overlapping', 'disjoint', 'disjoint_cells'], |
79 |
| - default=argparse.SUPPRESS, |
80 |
| - help="cross validation paritioning scheme: overlapping or disjoint") |
81 |
| - parser.add_argument("--cv", type=int, |
82 |
| - default=argparse.SUPPRESS, |
83 |
| - help="cross validation folds") |
84 |
| - parser.add_argument("--gen", action="store_true", |
85 |
| - help="use generator for training and validation data") |
86 |
| - parser.add_argument("--exclude_cells", nargs='+', |
87 |
| - default=[], |
88 |
| - help="cell line IDs to exclude") |
89 |
| - parser.add_argument("--exclude_drugs", nargs='+', |
90 |
| - default=[], |
91 |
| - help="drug line IDs to exclude") |
92 |
| - |
93 |
| - return parser |
94 |
| - |
95 |
| - |
96 |
| -def read_config_file(file): |
97 |
| - config = configparser.ConfigParser() |
98 |
| - config.read(file) |
99 |
| - section = config.sections() |
100 |
| - |
101 |
| - args = [['activation', 'batch_size', 'dense', 'dense_feature_layers', 'drop', |
| 19 | +additional_definitions = [ |
| 20 | +{'name':'cell_features', |
| 21 | + 'nargs':'+', |
| 22 | + 'choices':['expression', 'mirna', 'proteome', 'all', 'expression_5platform', 'expression_u133p2', 'rnaseq', 'categorical'], |
| 23 | + 'help':"use one or more cell line feature sets: 'expression', 'mirna', 'proteome', 'all'; use all for ['expression', 'mirna', 'proteome']; use 'categorical' for one-hot encoded cell lines"}, |
| 24 | +{'name':'drug_features', 'nargs':'+', |
| 25 | + 'choices':['descriptors', 'latent', 'all', 'categorical', 'noise'], |
| 26 | + 'help':"use dragon7 descriptors, latent representations from Aspuru-Guzik's SMILES autoencoder, or both, or one-hot encoded drugs, or random features; 'descriptors','latent', 'all', 'categorical', 'noise'"}, |
| 27 | +{'name':'dense_feature_layers', |
| 28 | + 'nargs':'+', |
| 29 | + 'type':int, |
| 30 | + 'help':'number of neurons in intermediate dense layers in the feature encoding submodels'}, |
| 31 | +{'name':'use_landmark_genes', |
| 32 | + 'type':candle.str2bool, |
| 33 | + 'default':True, #action="store_true", |
| 34 | + 'help':"use the 978 landmark genes from LINCS (L1000) as expression features"}, |
| 35 | +{'name':'preprocess_rnaseq', |
| 36 | + 'default':'none', |
| 37 | + 'choices':['source_scale', 'combat', 'none'], |
| 38 | + 'help':"preprocessing method for RNAseq data; none for global normalization"}, |
| 39 | +{'name':'response_url', |
| 40 | + 'default':None, |
| 41 | + 'help':"URL to combo dose response file"}, |
| 42 | +{'name':'residual', |
| 43 | + 'type':candle.str2bool, |
| 44 | + 'default':True, #action="store_true", |
| 45 | + 'help':"add skip connections to the layers"}, |
| 46 | +{'name':'reduce_lr', |
| 47 | + 'type':candle.str2bool, |
| 48 | + 'default':True, #action="store_true", |
| 49 | + 'help':'reduce learning rate on plateau'}, |
| 50 | +{'name':'warmup_lr', |
| 51 | + 'type':candle.str2bool, |
| 52 | + 'default':True, #action="store_true", |
| 53 | + 'help':'gradually increase learning rate on start'}, |
| 54 | +{'name':'base_lr', 'type':float, |
| 55 | + 'default':None, |
| 56 | + 'help':'base learning rate'}, |
| 57 | +{'name':'cp', |
| 58 | + 'type':candle.str2bool, |
| 59 | + 'default':True, #action="store_true", |
| 60 | + 'help':'checkpoint models with best val_loss'}, |
| 61 | +{'name':'tb', |
| 62 | + 'type':candle.str2bool, |
| 63 | + 'default':True, #action="store_true", |
| 64 | + 'help':'use tensorboard'}, |
| 65 | +{'name':'max_val_loss', 'type':float, |
| 66 | + 'help':'retrain if val_loss is greater than the threshold'}, |
| 67 | +{'name':'cv_partition', |
| 68 | + 'choices':['overlapping', 'disjoint', 'disjoint_cells'], |
| 69 | + 'help':"cross validation paritioning scheme: overlapping or disjoint"}, |
| 70 | +{'name':'cv', 'type':int, |
| 71 | + 'help':"cross validation folds"}, |
| 72 | +{'name':'gen', |
| 73 | + 'type':candle.str2bool, |
| 74 | + 'default':True, #action="store_true", |
| 75 | + 'help':"use generator for training and validation data"}, |
| 76 | +{'name':'exclude_cells', 'nargs':'+', |
| 77 | + 'default':[], |
| 78 | + 'help':"cell line IDs to exclude"}, |
| 79 | +{'name':'exclude_drugs', 'nargs':'+', |
| 80 | + 'default':[], |
| 81 | + 'help':"drug line IDs to exclude"} |
| 82 | +] |
| 83 | + |
| 84 | + |
| 85 | +required = [ 'activation', 'batch_size', 'dense', 'dense_feature_layers', 'drop', |
102 | 86 | 'epochs', 'learning_rate', 'loss', 'optimizer', 'residual', 'rng_seed',
|
103 |
| - 'save', 'scaling', 'feature_subsample', 'validation_split'], |
104 |
| - ['solr_root', 'timeout']] |
105 |
| - |
106 |
| - file_params = {} |
107 |
| - for i, sec_args in enumerate(args): |
108 |
| - for arg in sec_args: |
109 |
| - file_params[arg] = eval(config.get(section[i], arg)) |
110 |
| - |
111 |
| - # parse the remaining values |
112 |
| - for k, v in config.items(section[0]): |
113 |
| - if not k in file_params: |
114 |
| - file_params[k] = eval(v) |
| 87 | + 'save', 'scaling', 'feature_subsample', 'validation_split', |
| 88 | + 'solr_root', 'timeout' |
| 89 | + ] |
| 90 | + |
| 91 | +class BenchmarkCombo(candle.Benchmark): |
| 92 | + def set_locals(self): |
| 93 | + """Functionality to set variables specific for the benchmark |
| 94 | + - required: set of required parameters for the benchmark. |
| 95 | + - additional_definitions: list of dictionaries describing the additional parameters for the |
| 96 | + benchmark. |
| 97 | + """ |
| 98 | + |
| 99 | + if required is not None: |
| 100 | + self.required = set(required) |
| 101 | + if additional_definitions is not None: |
| 102 | + self.additional_definitions = additional_definitions |
115 | 103 |
|
116 |
| - return file_params |
0 commit comments