Skip to content

Commit 245f9bd

Browse files
author
Dave Lahr
committed
pandasGEXpress/parse_gct - modify to allow returning only row metadata or only col metadata to be more consistent with parse_gctx and allow consistent usage of parse
1 parent 7915d65 commit 245f9bd

File tree

4 files changed

+168
-140
lines changed

4 files changed

+168
-140
lines changed

cmapPy/pandasGEXpress/parse.py

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Generic parse method to parse either a .gct or a .gctx.
33
44
Takes in a file path corresponding to either a .gct or .gctx,
5-
and parses to a GCToo instance accordingly.
5+
and parses to a GCToo instance accordingly.
66
77
Note: Supports GCT1.2, GCT1.3, and GCTX1.0 files.
88
"""
@@ -20,43 +20,43 @@
2020

2121

2222
def parse(file_path, convert_neg_666=True, rid=None, cid=None, ridx=None, cidx=None,
23-
row_meta_only=False, col_meta_only=False, make_multiindex=False):
23+
row_meta_only=False, col_meta_only=False, make_multiindex=False):
2424
"""
25-
Identifies whether file_path corresponds to a .gct or .gctx file and calls the
26-
correct corresponding parse method.
25+
Identifies whether file_path corresponds to a .gct or .gctx file and calls the
26+
correct corresponding parse method.
2727
28-
Input:
29-
Mandatory:
30-
- gct(x)_file_path (str): full path to gct(x) file you want to parse.
31-
32-
Optional:
33-
- row_meta_only (bool): Whether to load data + metadata (if False), or just row metadata (if True)
34-
as pandas DataFrame
35-
- col_meta_only (bool): Whether to load data + metadata (if False), or just col metadata (if True)
36-
as pandas DataFrame
37-
- convert_neg_666 (bool): whether to convert -666 values to numpy.nan or not
38-
(see Note below for more details on this). Default = False.
39-
- rid (list of strings): list of row ids to specifically keep from gctx. Default=None.
40-
- cid (list of strings): list of col ids to specifically keep from gctx. Default=None.
41-
- make_multiindex (bool): whether to create a multi-index df combining
28+
Input:
29+
Mandatory:
30+
- gct(x)_file_path (str): full path to gct(x) file you want to parse.
31+
32+
Optional:
33+
- row_meta_only (bool): Whether to load data + metadata (if False), or just row metadata (if True)
34+
as pandas DataFrame
35+
- col_meta_only (bool): Whether to load data + metadata (if False), or just col metadata (if True)
36+
as pandas DataFrame
37+
- convert_neg_666 (bool): whether to convert -666 values to numpy.nan or not
38+
(see Note below for more details on this). Default = False.
39+
- rid (list of strings): list of row ids to specifically keep from gctx. Default=None.
40+
- cid (list of strings): list of col ids to specifically keep from gctx. Default=None.
41+
- make_multiindex (bool): whether to create a multi-index df combining
4242
the 3 component dfs
4343
44-
Output:
45-
- myGCToo (GCToo)
44+
Output:
45+
- myGCToo (GCToo)
4646
47-
Note: why does convert_neg_666 exist?
48-
- In CMap--for somewhat obscure historical reasons--we use "-666" as our null value
49-
for metadata. However (so that users can take full advantage of pandas' methods,
50-
including those for filtering nan's etc) we provide the option of converting these
51-
into numpy.NaN values, the pandas default.
52-
"""
47+
Note: why does convert_neg_666 exist?
48+
- In CMap--for somewhat obscure historical reasons--we use "-666" as our null value
49+
for metadata. However (so that users can take full advantage of pandas' methods,
50+
including those for filtering nan's etc) we provide the option of converting these
51+
into numpy.NaN values, the pandas default.
52+
"""
5353
if file_path.endswith(".gct"):
5454
# Ignoring arguments that won't be passed to parse_gct
55-
for unused_arg in ["rid", "cid", "cidx", "row_meta_only", "col_meta_only"]:
55+
for unused_arg in ["rid", "cid", "cidx"]:
5656
if eval(unused_arg):
5757
msg = "parse_gct does not use the argument {}. Ignoring it...".format(unused_arg)
58-
logger.info(msg)
59-
curr = parse_gct.parse(file_path, convert_neg_666, make_multiindex)
58+
logger.warning(msg)
59+
curr = parse_gct.parse(file_path, convert_neg_666, row_meta_only, col_meta_only, make_multiindex)
6060
elif file_path.endswith(".gctx"):
6161
curr = parse_gctx.parse(file_path, convert_neg_666, rid, cid, ridx, cidx, row_meta_only, col_meta_only,
6262
make_multiindex)

cmapPy/pandasGEXpress/parse_gct.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,13 +79,17 @@
7979
DATA_TYPE = np.float32
8080

8181

82-
def parse(file_path, convert_neg_666=True, make_multiindex=False):
82+
def parse(file_path, convert_neg_666=True, row_meta_only=False, col_meta_only=False, make_multiindex=False):
8383
""" The main method.
8484
8585
Args:
8686
- file_path (string): full path to gct(x) file you want to parse
8787
- convert_neg_666 (bool): whether to convert -666 values to numpy.nan
8888
(see Note below for more details). Default = True.
89+
- row_meta_only (bool): Whether to load data + metadata (if False), or just row metadata (if True)
90+
as pandas DataFrame
91+
- col_meta_only (bool): Whether to load data + metadata (if False), or just col metadata (if True)
92+
as pandas DataFrame
8993
- make_multiindex (bool): whether to create a multi-index df combining
9094
the 3 component dfs
9195
@@ -123,11 +127,16 @@ def parse(file_path, convert_neg_666=True, make_multiindex=False):
123127
file_path, num_data_rows, num_data_cols,
124128
num_row_metadata, num_col_metadata, nan_values)
125129

126-
# Create the gctoo object and assemble 3 component dataframes
127-
gctoo_obj = create_gctoo_obj(file_path, version,
128-
row_metadata, col_metadata, data, make_multiindex)
130+
if row_meta_only:
131+
return row_metadata
132+
elif col_meta_only:
133+
return col_metadata
134+
else:
135+
# Create the gctoo object and assemble 3 component dataframes
136+
gctoo_obj = create_gctoo_obj(file_path, version,
137+
row_metadata, col_metadata, data, make_multiindex)
129138

130-
return gctoo_obj
139+
return gctoo_obj
131140

132141

133142
def read_version_and_dims(file_path):

0 commit comments

Comments
 (0)