Skip to content

Commit 464f12b

Browse files
committed
update plangen
1 parent 5b5e806 commit 464f12b

File tree

4 files changed

+103
-3
lines changed

4 files changed

+103
-3
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Using Uno for Challenge Problem
2+
3+
## Generate Plan file
4+
5+
From the (master dataframe generation process)[https://github.com/ECP-CANDLE/topN_generator/blob/master/build_master.ipynb], the unique list of cell line and drug will be generated (top21_cell.txt and top21_drug.txt respectively)
6+
7+
```
8+
$ python plangen.py --fs_name cell drug --fs_paths top21_cell.txt top21_drug.txt --fs_parts 4 1 --out_dir . --overwrite
9+
...
10+
plangen_cell694-p4_drug1492-p1.json JSON file written
11+
```
12+
13+
## Node specific dataframe generation
14+
15+
This part is already integrated in the Challenge Problem workflow, but for the testing purpose, you can run this command to generate a dataframe for node 1.1
16+
17+
```
18+
$ python topN_to_uno.py --dataframe_from top21.parquet --plan plangen_cell694-p4_drug1492-p1.json --incremental --cell_feature_selection lincs1000 --node 1.1 --output TopN_1.1_uno.h5
19+
```
20+
21+
## Running Uno with new dataframe
22+
23+
```
24+
# set CANDLE_DATA_DIR to point direcotry containing topN_1.1_uno.h5
25+
$ python uno_baseline_keras2.py --conf uno_auc_model.txt --use_exported_data topN_1.1_uno.h5
26+
```

Pilot1/Uno/planargs.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""
2+
plangen command line argument definitions
3+
"""
4+
5+
import argparse
6+
7+
partitioning_strategies = ['windows', 'undefine1', 'undefined2'] # to be completed ?????????????
8+
9+
10+
def parse_arguments():
11+
parser = argparse.ArgumentParser(
12+
description='feature-set partioning'
13+
)
14+
15+
parser.add_argument('--in_dir',
16+
type=str,
17+
help='Directory containing feature-set list files')
18+
19+
parser.add_argument('--out_dir',
20+
default='results',
21+
type=str,
22+
help='Directory to contain generated plan files')
23+
24+
parser.add_argument('--json',
25+
action='store_true',
26+
help='Generate plan in JSON format')
27+
28+
parser.add_argument('--overwrite',
29+
action='store_true',
30+
help='Accept non-empty out_dir, contents overwritten')
31+
32+
parser.add_argument('--partition_strategy',
33+
choices=partitioning_strategies,
34+
default=partitioning_strategies[0],
35+
help='Specify a feature-set partitioning strategy')
36+
37+
# The following fs_* arguments are required, the number of values specified for each
38+
# must match, and at least two values are required for each
39+
40+
parser.add_argument('--fs_names',
41+
required=True,
42+
type=str,
43+
nargs='+',
44+
help='Specify a list of (arbitrary) feature-set names')
45+
46+
parser.add_argument('--fs_paths',
47+
required=True,
48+
type=str,
49+
nargs='+',
50+
help='Specify a list of feature-set file paths')
51+
52+
parser.add_argument('--fs_parts',
53+
required=True,
54+
type=int,
55+
nargs='+',
56+
help='Specify a list of partition counts')
57+
58+
parser.add_argument('--verbose',
59+
action='store_true',
60+
help='Verbosity')
61+
62+
parser.add_argument('--debug',
63+
action='store_true',
64+
help='Data structure dumps, etc')
65+
66+
parser.add_argument('--test',
67+
action='store_true',
68+
help='Test plan navigation and entry retrieval')
69+
70+
parser.add_argument('--maxdepth', type=int, default=0)
71+
parser.add_argument('--print_tree', action='store_true')
72+
args = parser.parse_args()
73+
return args

Pilot1/Uno/plangen.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import os
55
import sqlite3
66
import sys
7+
import planargs
78
from abc import ABC, abstractmethod # abstract class support
89
from collections import OrderedDict, deque, namedtuple
910
from datetime import datetime
@@ -139,8 +140,8 @@ def validate_args(args):
139140
sys.exit("Terminating due to error")
140141

141142
# construct a partitioning object exporting a partion() function
142-
if args.partition_strategy == "leaveout":
143-
generator = LeaveoutSubsetGenerator()
143+
# if args.partition_strategy == "leaveout":
144+
generator = LeaveoutSubsetGenerator()
144145

145146
# return feature-set contents lists
146147
return generator, fs_content

Pilot1/Uno/topN_to_uno.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def build_dataframe(args):
242242
x_test_1.columns = [""] * len(x_test_1.columns)
243243

244244
# store
245-
store = pd.HDFStore(args.output, "w", complevel=9, complib="blosc:snappy")
245+
store = pd.HDFStore(args.output, "w")
246246
store.put("y_train", y_train, format="table")
247247
store.put("y_val", y_val, format="table")
248248
store.put("x_train_0", x_train_0, format="table")

0 commit comments

Comments
 (0)