Skip to content

Commit f7b6eaf

Browse files
authored
Merge pull request #460 from tristan-f-r/oi2-more-docs
chore(oi2): pin defaults, docs
2 parents 08946b1 + 8a7fca3 commit f7b6eaf

File tree

5 files changed

+37
-39
lines changed

5 files changed

+37
-39
lines changed

spras/analysis/summary.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
from pathlib import Path
23
from statistics import median
34
from typing import Iterable
@@ -99,8 +100,9 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg
99100
# Algorithm parameters have format { algo : { hashcode : { parameter combos } } }
100101
param_combo = algo_params[algo][hashcode]
101102
del param_combo['_spras_run_name']
102-
# TODO: sort parameters to provide stable summary table output
103-
cur_nw_info.append(param_combo)
103+
# We use json.dumps to properly serialize enums as strings,
104+
# and sort parameters to provide stable summary table output.
105+
cur_nw_info.append(json.dumps(param_combo, sort_keys=True))
104106

105107
# Save the current network information to the network summary list
106108
nw_info.append(cur_nw_info)

spras/omicsintegrator1.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,6 @@ class DummyMode(CaseInsensitiveEnum):
5151
file = 'file'
5252
"connect the dummy node to a specific list of nodes provided in a file"
5353

54-
# To make sure that DummyMode prints as `terminals`, etc.. in JSON dictionaries
55-
# (since they use object representation internally.)
56-
def __repr__(self) -> str:
57-
return f"'{self.name}'"
58-
5954
class OmicsIntegrator1Params(BaseModel):
6055
dummy_mode: Optional[DummyMode] = None
6156
mu_squared: bool = False

spras/omicsintegrator2.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,11 @@
1616

1717
class DummyMode(CaseInsensitiveEnum):
1818
terminals = 'terminals'
19+
"Connect to all terminals."
1920
others = 'others'
21+
"Connect to all nodes except for terminals."
2022
all = 'all'
23+
"Connect to all nodes in the interactome."
2124

2225
class OmicsIntegrator2Params(BaseModel):
2326
w: float = 5
@@ -29,22 +32,17 @@ class OmicsIntegrator2Params(BaseModel):
2932
g: float = 3
3033
"Gamma: multiplicative edge penalty from degree of endpoints"
3134

32-
noise: Optional[float] = None
35+
noise: float = 0.1
3336
"Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations."
3437

35-
noisy_edges: Optional[int] = None
38+
noisy_edges: int = 0
3639
"An integer specifying how many times to add noise to the given edge values and re-run."
3740

38-
random_terminals: Optional[int] = None
41+
random_terminals: int = 0
3942
"An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run"
4043

41-
dummy_mode: Optional[DummyMode] = None
42-
"""
43-
Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals)
44-
"terminals" = connect to all terminals
45-
"others" = connect to all nodes except for terminals
46-
"all" = connect to all nodes in the interactome.
47-
"""
44+
dummy_mode: DummyMode = DummyMode.terminals
45+
"Tells the program which nodes in the interactome to connect the dummy node to."
4846

4947
seed: Optional[int] = None
5048
"The random seed to use for this run."
@@ -106,7 +104,6 @@ def generate_inputs(data: Dataset, filename_map):
106104
edges_df.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'Interactor2', 'cost'],
107105
header=['protein1', 'protein2', 'cost'])
108106

109-
# TODO add reasonable default values
110107
@staticmethod
111108
def run(inputs, output_file, args=None, container_settings=None):
112109
if not container_settings: container_settings = ProcessedContainerSettings()
@@ -195,7 +192,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file, params):
195192
df = add_rank_column(df)
196193
df = reinsert_direction_col_undirected(df)
197194
df.columns = ['Node1', 'Node2', 'Rank', "Direction"]
198-
else: # corrupted data
195+
else:
196+
# We get protein1, protein2, and cost if no edges were inside the solution (as networkx
197+
# does not have any edges in the solution to loop over, and therefore never makes the column)
198+
# and we get protein1, protein2 if no edges were present in the augmented forest at all:
199+
# both of these outcomes should be treated as an empty network.
199200
df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction'])
200201

201202
df, has_duplicates = duplicate_edges(df)
Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in sources Nodes in targets Nodes in active Nodes in dummy Parameter combination
2-
test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 {'module_threshold': 0.05, 'slice_threshold': 0.3}
3-
test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 {'max_path_length': 3, 'local_search': True, 'rand_restarts': 10}
4-
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 10.0, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01}
5-
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 2.0, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01}
6-
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 0.55, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01}
7-
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 {'w': 5.0, 'b': 4.0, 'g': 0.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
8-
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 {'w': 5.0, 'b': 2.0, 'g': 3.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
9-
test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 {'k': 10}
10-
test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 {'k': 20}
2+
test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 "{""module_threshold"": 0.05, ""slice_threshold"": 0.3}"
3+
test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 "{""local_search"": true, ""max_path_length"": 3, ""rand_restarts"": 10}"
4+
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 "{""b"": 10.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}"
5+
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 "{""b"": 2.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}"
6+
test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 "{""b"": 0.55, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}"
7+
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 "{""b"": 4.0, ""dummy_mode"": ""terminals"", ""g"": 0.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
8+
test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 "{""b"": 2.0, ""dummy_mode"": ""terminals"", ""g"": 3.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
9+
test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 "{""k"": 10}"
10+
test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 "{""k"": 20}"
Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in active Nodes in dummy Nodes in sources Nodes in targets Parameter combination
22
test/analysis/input/example/data0-allpairs-params-BEH6YB2_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {}
3-
test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'module_threshold': 0.05, 'slice_threshold': 0.3}
4-
test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'max_path_length': 3, 'local_search': True, 'rand_restarts': 10}
5-
test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'flow': 1, 'capacity': 1}
6-
test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.0, 'b': 6.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
7-
test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.0, 'b': 5.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
8-
test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 5.0, 'b': 5.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
9-
test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 5.0, 'b': 6.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0}
10-
test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'w': 5.0, 'b': 4.0, 'g': 0.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
11-
test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'w': 5.0, 'b': 2.0, 'g': 3.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None}
12-
test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 200}
13-
test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 100}
3+
test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""module_threshold"": 0.05, ""slice_threshold"": 0.3}"
4+
test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""local_search"": true, ""max_path_length"": 3, ""rand_restarts"": 10}"
5+
test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""capacity"": 1, ""flow"": 1}"
6+
test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 6.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.0}"
7+
test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 5.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.0}"
8+
test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 5.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 5.0}"
9+
test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 6.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 5.0}"
10+
test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 4.0, ""dummy_mode"": ""terminals"", ""g"": 0.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
11+
test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 2.0, ""dummy_mode"": ""terminals"", ""g"": 3.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}"
12+
test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""k"": 200}"
13+
test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""k"": 100}"

0 commit comments

Comments
 (0)