Skip to content

Commit c6c7bdb

Browse files
committed
New capability to show part of plan
1 parent 1d9c2c5 commit c6c7bdb

File tree

1 file changed

+82
-25
lines changed

1 file changed

+82
-25
lines changed

Pilot1/Uno/topN_to_uno.py

Lines changed: 82 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import time
55
import json
66
from collections import OrderedDict
7-
import pandas as pd
8-
import numpy as np
97

108

119
def parse_arguments():
@@ -26,6 +24,7 @@ def parse_arguments():
2624
help='Plain text list for drug feature filtering. one item per line')
2725
parser.add_argument('--output', type=str, default='topN.uno.h5',
2826
help='output filename')
27+
parser.add_argument('--show', action='store_true', help='Simply show the plan node')
2928

3029
args, unparsed = parser.parse_known_args()
3130
return args, unparsed
@@ -36,11 +35,14 @@ def read_plan(filename, node):
3635
with open(filename, 'r') as plan_file:
3736
plan = json.load(plan_file)
3837
if node is None:
39-
return plan
40-
if node in plan:
41-
return plan[node]
38+
result = plan
39+
elif node in plan:
40+
result = plan[node]
4241
else:
4342
raise Exception('Node index "{}" was not found in plan file'.format(node))
43+
print("read_plan(): done.")
44+
return result
45+
4446

4547
class topN_NoDataException(Exception):
4648
pass
@@ -60,16 +62,16 @@ def build_masks(args, df):
6062

6163
for partition in ['train', 'val']:
6264
_mask = df['Sample'] == None # noqa Should keep == operator here. This is a pandas operation.
63-
for i in range(len(ids[partition]['cell'])):
64-
if 'cell' in ids[partition] and 'drug' in ids[partition]:
65-
cl_filter = ids[partition]['cell'][i]
66-
dr_filter = ids[partition]['drug'][i]
67-
__mask = df['Sample'].isin(cl_filter) & df['Drug1'].isin(dr_filter)
68-
elif 'cell' in ids[partition]:
69-
cl_filter = ids[partition]['cell'][i]
65+
for i in range(len(ids[partition]['CELL'])):
66+
if 'CELL' in ids[partition] and 'DRUG' in ids[partition]:
67+
cl_filter = ids[partition]['CELL'][i]
68+
dr_filter = ids[partition]['DRUG'][i]
69+
__mask = df['Sample'].isin(cl_filter) & df['DRUG1'].isin(dr_filter)
70+
elif 'CELL' in ids[partition]:
71+
cl_filter = ids[partition]['CELL'][i]
7072
__mask = df['Sample'].isin(cl_filter)
71-
elif 'drug' in ids[partition]:
72-
dr_filter = ids[partition]['drug'][i]
73+
elif 'DRUG' in ids[partition]:
74+
dr_filter = ids[partition]['DRUG'][i]
7375
__mask = df['Drug1'].isin(dr_filter)
7476
_mask = _mask | __mask
7577
mask[partition] = _mask
@@ -84,17 +86,18 @@ def build_masks_w_holdout(args, df):
8486

8587
print('from new build_mask: {} {} {}'.format(args.plan, args.node, args.incremental))
8688
import plangen
87-
plan = read_plan(args.plan, None)
89+
plan = read_plan(args.plan, args.node)
90+
8891
ids = {}
8992
mask = {}
90-
# Dicts {'cell': [[CCL_510, CCL_577, ...]]} :
93+
# Dicts {'CELL': [[CCL_510, CCL_577, ...]]} :
9194
_, _, ids['train'], ids['val'] = plangen.get_subplan_features(plan, args.node, args.incremental)
9295
if ids['train'] == None:
9396
print("topN: get_subplan_features() returned None!")
9497
raise topN_NoDataException()
9598

96-
print("cell lines in plan for %s: ids train len: " % args.node +
97-
str(len(ids['train']['cell'][0])))
99+
print("CELL lines in plan for %s: ids train len: " % args.node +
100+
str(len(ids['train']['CELL'][0])))
98101

99102
# holdout
100103
from sklearn.model_selection import ShuffleSplit
@@ -124,19 +127,19 @@ def build_masks_w_holdout(args, df):
124127

125128
for partition in ['train', 'val']:
126129
_mask = df['Sample'] == None # noqa Should keep == operator here. This is a pandas operation.
127-
for i in range(len(ids[partition]['cell'])):
130+
for i in range(len(ids[partition]['CELL'])):
128131
print("i: %i" % i)
129132

130-
if 'cell' in ids[partition] and 'drug' in ids[partition]:
133+
if 'CELL' in ids[partition] and 'drug' in ids[partition]:
131134
print("IF CD")
132-
cl_filter = ids[partition]['cell'][i]
135+
cl_filter = ids[partition]['CELL'][i]
133136
dr_filter = ids[partition]['drug'][i]
134137
__mask = df_new['Sample'].isin(cl_filter) & \
135138
df_new['Drug1'].isin(dr_filter)
136139

137-
elif 'cell' in ids[partition]:
140+
elif 'CELL' in ids[partition]:
138141
print("IF C.")
139-
cl_filter = ids[partition]['cell'][i]
142+
cl_filter = ids[partition]['CELL'][i]
140143
__mask = df_new['Sample'].isin(cl_filter)
141144
elif 'drug' in ids[partition]:
142145
print("IF D.")
@@ -148,11 +151,14 @@ def build_masks_w_holdout(args, df):
148151

149152

150153
def get_random_mask(df):
154+
import numpy as np
151155
return np.random.rand(len(df)) < 0.8
152156

153157

154158
def read_dataframe(args):
155159
print("in read_dataframe") ; sys.stdout.flush()
160+
import pandas as pd
161+
156162
_, ext = os.path.splitext(args.dataframe_from)
157163
if ext == '.h5' or ext == '.hdf5':
158164
print("HDFStore r " + str(args.dataframe_from))
@@ -198,6 +204,7 @@ def read_dataframe(args):
198204

199205
def build_dataframe(args):
200206
print("read_dataframe") ; sys.stdout.flush()
207+
import pandas as pd
201208
df_y, df_cl, df_dd = read_dataframe(args)
202209
print("read_dataframe OK") ; sys.stdout.flush()
203210

@@ -224,7 +231,7 @@ def build_dataframe(args):
224231
x_test_0 = df_cl.iloc[~df_cl.index.isin(tr_vl_idx), :].reset_index(drop=True)
225232
x_test_1 = df_dd.iloc[~df_dd.index.isin(tr_vl_idx), :].reset_index(drop=True)
226233
x_test_1.columns = [''] * len(x_val_1.columns)
227-
else:
234+
else: # args.fold is None
228235
# train_mask, val_mask = build_masks(args, df_y)
229236
train_mask, val_mask, test_mask = build_masks_w_holdout(args, df_y)
230237
print(str(train_mask))
@@ -278,6 +285,56 @@ def build_dataframe(args):
278285
store.close()
279286

280287

288+
def print_line(line):
289+
"""line: list of str"""
290+
if len(line) == 0:
291+
return
292+
# Indent
293+
print(" ", end="")
294+
text = " ".join(line)
295+
print(text)
296+
297+
298+
def show_list(L):
299+
"""
300+
Show list entries in indented 70-character lines,
301+
ending on blank line
302+
"""
303+
limit = 70
304+
# Current character in line:
305+
c = 0
306+
307+
line = []
308+
for entry in L:
309+
s = str(entry)
310+
# Include space between last entry and this one
311+
n = len(s) + 1
312+
c += n
313+
if c > limit:
314+
print_line(line)
315+
line.clear()
316+
c = len(s)
317+
line.append(s)
318+
319+
print_line(line)
320+
print("")
321+
322+
323+
def show(args):
324+
"""Simply show the entry for this node"""
325+
plan_dict = read_plan(args.plan, args.node)
326+
print(str(plan_dict))
327+
val_cells = plan_dict['val'][0]['cell']
328+
print("val cells: count: %i" % len(val_cells))
329+
show_list(val_cells)
330+
train_cells = plan_dict['train'][0]['cell']
331+
print("train cells: count: %i" % len(train_cells))
332+
show_list(train_cells)
333+
334+
281335
if __name__ == '__main__':
282336
parsed, unparsed = parse_arguments()
283-
build_dataframe(parsed)
337+
if parsed.show:
338+
show(parsed)
339+
else:
340+
build_dataframe(parsed)

0 commit comments

Comments
 (0)