Skip to content

Commit 2a9e4d0

Browse files
committed
add locally connected layers
1 parent f701bab commit 2a9e4d0

File tree

2 files changed

+58
-15
lines changed

2 files changed

+58
-15
lines changed

P1B3/p1b3.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ def scale(df, scaling=None):
5555

5656
mat = df.as_matrix()
5757
mat = scaler.fit_transform(mat)
58-
# print(mat.shape)
5958
df = pd.DataFrame(mat, columns=df.columns)
6059

6160
return df
@@ -81,13 +80,10 @@ def impute_and_scale(df, scaling='std'):
8180
return pd.DataFrame(mat, columns=df.columns)
8281

8382
if scaling == 'maxabs':
84-
# Normalizing -1 to 1
8583
scaler = MaxAbsScaler()
8684
elif scaling == 'minmax':
87-
# Scaling to [0,1]
8885
scaler = MinMaxScaler()
8986
else:
90-
# Standard normalization
9187
scaler = StandardScaler()
9288

9389
mat = scaler.fit_transform(mat)
@@ -291,8 +287,10 @@ def __init__(self, val_split=0.2, shuffle=True, drug_features='descriptors',
291287

292288
self.df_cellline = load_cellline_expressions(cell_expr_path, ncols=feature_subsample, scaling=scaling)
293289

294-
df = load_dose_response(dose_resp_path, min_logconc=min_logconc, max_logconc=max_logconc, subsample='naive_balancing')
290+
df = load_dose_response(dose_resp_path, min_logconc=min_logconc, max_logconc=max_logconc, subsample=subsample)
295291
logger.info('Loaded {} unique (D, CL) response sets.'.format(df.shape[0]))
292+
# df[['GROWTH', 'LOG_CONCENTRATION']].to_csv('all.response.csv')
293+
296294
df = df.reset_index()
297295
df = df.merge(self.df_cellline[['CELLNAME']], on='CELLNAME')
298296

@@ -310,6 +308,7 @@ def __init__(self, val_split=0.2, shuffle=True, drug_features='descriptors',
310308
self.df_drug_rand = df_rand.reset_index()
311309

312310
logger.debug('Filltered down to {} rows with matching information.'.format(df.shape[0]))
311+
# df[['GROWTH', 'LOG_CONCENTRATION']].to_csv('filtered.response.csv')
313312

314313
df_test_cell = pd.read_csv(test_cell_path)
315314
df_test_drug = pd.read_csv(test_drug_path, dtype={'NSC':object})
@@ -361,16 +360,21 @@ def __init__(self, val_split=0.2, shuffle=True, drug_features='descriptors',
361360
logger.info('Rows in train: {}, val: {}, test: {}'.format(self.n_train, self.n_val, self.n_test))
362361

363362
self.input_dim = self.df_cellline.shape[1] - 1 + 1 # remove CELLNAME; add concentration
363+
logger.info('Features:')
364+
logger.info(' concentration: 1')
365+
logger.info(' cell line expression: {}'.format(self.input_dim-1))
364366
if self.drug_features in ['descriptors', 'both']:
365367
self.input_dim += self.df_drug_desc.shape[1] - 1 # remove NSC
368+
logger.info(' drug descriptors: {}'.format(self.df_drug_desc.shape[1] - 1))
366369
if self.drug_features in ['latent', 'both']:
367370
self.input_dim += self.df_drug_auen.shape[1] - 1 # remove NSC
371+
logger.info(' drug latent representations: {}'.format(self.df_drug_auen.shape[1] - 1))
368372
if self.drug_features == 'noise':
369373
self.input_dim += self.df_drug_rand.shape[1] - 1 # remove NSC
374+
logger.info(' drug random vectors: {}'.format(self.df_drug_rand.shape[1] - 1))
375+
logger.info('Total input dimensions: {}'.format(self.input_dim))
370376

371-
logger.info('Input dim = {}'.format(self.input_dim))
372-
373-
def flow(self, batch_size=32, data='train', reshape=False):
377+
def flow(self, batch_size=32, data='train', topology=None):
374378
if data == 'val':
375379
cyc = self.cycle_val
376380
elif data == 'test':
@@ -398,7 +402,8 @@ def flow(self, batch_size=32, data='train', reshape=False):
398402
x = np.array(df.iloc[:, 1:])
399403
y = np.array(df.iloc[:, 0])
400404
y = y / 100.
401-
if reshape:
405+
406+
if topology == 'simple_local':
402407
yield x.reshape(x.shape + (1,)), y
403408
# yield x.reshape(x.shape[0], 1, x.shape[1]), y
404409
else:

P1B3/p1b3_baseline.py

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from keras import backend as K
1818
from keras import metrics
1919
from keras.models import Sequential
20-
from keras.layers import Dense, Dropout
20+
from keras.layers import Dense, Dropout, LocallyConnected1D, MaxPooling1D, Flatten
2121
from keras.callbacks import Callback, ModelCheckpoint, ProgbarLogger
2222

2323
from sklearn.preprocessing import Imputer
@@ -64,6 +64,13 @@
6464
D4 = 50
6565
DENSE_LAYERS = [D1, D2, D3, D4]
6666

67+
# Number of units per locally connected layer
68+
LC1 = 10, 1 # nb_filter, filter_length
69+
LC2 = 0, 0 # disabled layer
70+
# LOCALLY_CONNECTED_LAYERS = list(LC1 + LC2)
71+
LOCALLY_CONNECTED_LAYERS = [0, 0]
72+
POOL = 100
73+
6774
MIN_LOGCONC = -5.
6875
MAX_LOGCONC = -4.
6976

@@ -90,6 +97,9 @@ def get_parser():
9097
parser.add_argument("-e", "--epochs", action="store",
9198
default=NB_EPOCH, type=int,
9299
help="number of training epochs")
100+
parser.add_argument("-l", "--locally_connected", action="store", nargs='+', type=int,
101+
default=LOCALLY_CONNECTED_LAYERS,
102+
help="integer array describing locally connected layers: layer1_nb_filter, layer1_filter_len, layer2_nb_filter, layer2_filter_len, ...")
93103
parser.add_argument("-o", "--optimizer", action="store",
94104
default=OPTIMIZER,
95105
help="keras optimizer to use: sgd, rmsprop, ...")
@@ -99,6 +109,9 @@ def get_parser():
99109
parser.add_argument("--loss", action="store",
100110
default=LOSS,
101111
help="keras loss function to use: mse, ...")
112+
parser.add_argument("--pool", action="store",
113+
default=POOL, type=int,
114+
help="pooling layer length")
102115
parser.add_argument("--scaling", action="store",
103116
default=SCALING,
104117
help="type of feature scaling; 'minabs': to [-1,1]; 'minmax': to [0,1], 'std': standard unit normalization; None: no normalization")
@@ -147,6 +160,16 @@ def extension_from_parameters(args):
147160
ext += '.E={}'.format(args.epochs)
148161
if args.feature_subsample:
149162
ext += '.F={}'.format(args.feature_subsample)
163+
if args.locally_connected:
164+
layer_list = list(range(0, len(args.locally_connected), 2))
165+
for l, i in enumerate(layer_list):
166+
nb_filter = args.locally_connected[i]
167+
filter_len = args.locally_connected[i+1]
168+
if nb_filter <= 0 or filter_len <= 0:
169+
break
170+
ext += '.LC{}={},{}'.format(l+1, nb_filter, filter_len)
171+
if args.pool and layer_list[0] and layer_list[1]:
172+
ext += '.P={}'.format(args.pool)
150173
for i, n in enumerate(args.dense):
151174
if n:
152175
ext += '.D{}={}'.format(i+1, n)
@@ -289,13 +312,23 @@ def main():
289312
subsample=args.subsample,
290313
category_cutoffs=args.category_cutoffs)
291314

292-
train_gen = datagen.flow(batch_size=args.batch_size)
293-
val_gen = datagen.flow(data='val', batch_size=args.batch_size)
294-
val_gen2 = datagen.flow(data='val', batch_size=args.batch_size)
295-
test_gen = datagen.flow(data='test', batch_size=args.batch_size)
296-
315+
topology = 'dense'
297316
out_dim = 1
317+
298318
model = Sequential()
319+
if args.locally_connected and args.locally_connected[0]:
320+
topology = 'simple_local'
321+
layer_list = list(range(0, len(args.locally_connected), 2))
322+
for l, i in enumerate(layer_list):
323+
nb_filter = args.locally_connected[i]
324+
filter_len = args.locally_connected[i+1]
325+
if nb_filter <= 0 or filter_len <= 0:
326+
break
327+
model.add(LocallyConnected1D(nb_filter, filter_len, input_shape=(datagen.input_dim, 1), activation=args.activation))
328+
if args.pool:
329+
model.add(MaxPooling1D(pool_length=args.pool))
330+
model.add(Flatten())
331+
299332
for layer in args.dense:
300333
if layer:
301334
model.add(Dense(layer, input_dim=datagen.input_dim, activation=args.activation))
@@ -306,6 +339,11 @@ def main():
306339
model.summary()
307340
model.compile(loss=args.loss, optimizer=args.optimizer)
308341

342+
train_gen = datagen.flow(batch_size=args.batch_size, topology=topology)
343+
val_gen = datagen.flow(data='val', batch_size=args.batch_size, topology=topology)
344+
val_gen2 = datagen.flow(data='val', batch_size=args.batch_size, topology=topology)
345+
test_gen = datagen.flow(data='test', batch_size=args.batch_size, topology=topology)
346+
309347
train_samples = int(datagen.n_train/args.batch_size) * args.batch_size
310348
val_samples = int(datagen.n_val/args.batch_size) * args.batch_size
311349
test_samples = int(datagen.n_test/args.batch_size) * args.batch_size

0 commit comments

Comments
 (0)