diff --git a/spras/analysis/summary.py b/spras/analysis/summary.py index 2092200f..58123850 100644 --- a/spras/analysis/summary.py +++ b/spras/analysis/summary.py @@ -1,3 +1,4 @@ +import json from pathlib import Path from statistics import median from typing import Iterable @@ -99,8 +100,9 @@ def summarize_networks(file_paths: Iterable[Path], node_table: pd.DataFrame, alg # Algorithm parameters have format { algo : { hashcode : { parameter combos } } } param_combo = algo_params[algo][hashcode] del param_combo['_spras_run_name'] - # TODO: sort parameters to provide stable summary table output - cur_nw_info.append(param_combo) + # We use json.dumps to properly serialize enums as strings, + # and sort parameters to provide stable summary table output. + cur_nw_info.append(json.dumps(param_combo, sort_keys=True)) # Save the current network information to the network summary list nw_info.append(cur_nw_info) diff --git a/spras/omicsintegrator1.py b/spras/omicsintegrator1.py index d8f07ff8..916b7c45 100644 --- a/spras/omicsintegrator1.py +++ b/spras/omicsintegrator1.py @@ -51,11 +51,6 @@ class DummyMode(CaseInsensitiveEnum): file = 'file' "connect the dummy node to a specific list of nodes provided in a file" - # To make sure that DummyMode prints as `terminals`, etc.. in JSON dictionaries - # (since they use object representation internally.) - def __repr__(self) -> str: - return f"'{self.name}'" - class OmicsIntegrator1Params(BaseModel): dummy_mode: Optional[DummyMode] = None mu_squared: bool = False diff --git a/spras/omicsintegrator2.py b/spras/omicsintegrator2.py index b20f9c9b..b6c18efd 100644 --- a/spras/omicsintegrator2.py +++ b/spras/omicsintegrator2.py @@ -16,8 +16,11 @@ class DummyMode(CaseInsensitiveEnum): terminals = 'terminals' + "Connect to all terminals." others = 'others' + "Connect to all nodes except for terminals." all = 'all' + "Connect to all nodes in the interactome." class OmicsIntegrator2Params(BaseModel): w: float = 5 @@ -29,22 +32,17 @@ class OmicsIntegrator2Params(BaseModel): g: float = 3 "Gamma: multiplicative edge penalty from degree of endpoints" - noise: Optional[float] = None + noise: float = 0.1 "Standard Deviation of the gaussian noise added to edges in Noisy Edges Randomizations." - noisy_edges: Optional[int] = None + noisy_edges: int = 0 "An integer specifying how many times to add noise to the given edge values and re-run." - random_terminals: Optional[int] = None + random_terminals: int = 0 "An integer specifying how many times to apply your given prizes to random nodes in the interactome and re-run" - dummy_mode: Optional[DummyMode] = None - """ - Tells the program which nodes in the interactome to connect the dummy node to. (default: terminals) - "terminals" = connect to all terminals - "others" = connect to all nodes except for terminals - "all" = connect to all nodes in the interactome. - """ + dummy_mode: DummyMode = DummyMode.terminals + "Tells the program which nodes in the interactome to connect the dummy node to." seed: Optional[int] = None "The random seed to use for this run." @@ -106,7 +104,6 @@ def generate_inputs(data: Dataset, filename_map): edges_df.to_csv(filename_map['edges'], sep='\t', index=False, columns=['Interactor1', 'Interactor2', 'cost'], header=['protein1', 'protein2', 'cost']) - # TODO add reasonable default values @staticmethod def run(inputs, output_file, args=None, container_settings=None): if not container_settings: container_settings = ProcessedContainerSettings() @@ -195,7 +192,11 @@ def parse_output(raw_pathway_file, standardized_pathway_file, params): df = add_rank_column(df) df = reinsert_direction_col_undirected(df) df.columns = ['Node1', 'Node2', 'Rank', "Direction"] - else: # corrupted data + else: + # We get protein1, protein2, and cost if no edges were inside the solution (as networkx + # does not have any edges in the solution to loop over, and therefore never makes the column) + # and we get protein1, protein2 if no edges were present in the augmented forest at all: + # both of these outcomes should be treated as an empty network. df = pd.DataFrame(columns=['Node1', 'Node2', 'Rank', 'Direction']) df, has_duplicates = duplicate_edges(df) diff --git a/test/analysis/expected_output/expected_egfr_summary.txt b/test/analysis/expected_output/expected_egfr_summary.txt index a2d151a4..bdbaded9 100644 --- a/test/analysis/expected_output/expected_egfr_summary.txt +++ b/test/analysis/expected_output/expected_egfr_summary.txt @@ -1,10 +1,10 @@ Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in sources Nodes in targets Nodes in active Nodes in dummy Parameter combination -test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 {'module_threshold': 0.05, 'slice_threshold': 0.3} -test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 {'max_path_length': 3, 'local_search': True, 'rand_restarts': 10} -test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 10.0, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01} -test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 2.0, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01} -test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.1, 'b': 0.55, 'd': 10, 'mu': 0.008, 'noise': None, 'g': 0.001, 'r': 0.01} -test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 {'w': 5.0, 'b': 4.0, 'g': 0.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None} -test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 {'w': 5.0, 'b': 2.0, 'g': 3.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None} -test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 {'k': 10} -test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 {'k': 20} +test/analysis/input/egfr/tps-egfr-domino-params-V3X4RW7_pathway.txt 48 45 3 0.0398936170212766 5 2.0 16 3.882808476926124 27 0 27 27 0 "{""module_threshold"": 0.05, ""slice_threshold"": 0.3}" +test/analysis/input/egfr/tps-egfr-meo-params-GKEDDFZ_pathway.txt 1877 12845 1 0.007295700506524384 469 6.0 6 2.7973618474338107 621 1 620 621 1 "{""local_search"": true, ""max_path_length"": 3, ""rand_restarts"": 10}" +test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-3THRXWW_pathway.txt 28 20 8 0.05291005291005291 4 1.0 5 1.306439393939394 28 1 27 28 1 "{""b"": 10.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}" +test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-5QH767V_pathway.txt 39 31 8 0.04183535762483131 6 1.0 5 1.5084498834498834 39 1 38 39 1 "{""b"": 2.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}" +test/analysis/input/egfr/tps-egfr-omicsintegrator1-params-ITO5EQS_pathway.txt 14 9 5 0.0989010989010989 4 1.0 2 1.1866666666666668 14 0 14 14 0 "{""b"": 0.55, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.008, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.01, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.1}" +test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-EHHWPMD_pathway.txt 593 591 2 0.0033669841848593955 32 1.0 30 6.72248989073389 531 1 530 531 1 "{""b"": 4.0, ""dummy_mode"": ""terminals"", ""g"": 0.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}" +test/analysis/input/egfr/tps-egfr-omicsintegrator2-params-IV3IPCJ_pathway.txt 704 702 2 0.002836867968446916 35 1.0 24 6.038766691954387 616 1 615 616 1 "{""b"": 2.0, ""dummy_mode"": ""terminals"", ""g"": 3.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}" +test/analysis/input/egfr/tps-egfr-pathlinker-params-7S4SLU6_pathway.txt 14 17 1 0.18681318681318682 6 2.0 7 2.857142857142857 6 1 5 6 1 "{""k"": 10}" +test/analysis/input/egfr/tps-egfr-pathlinker-params-TCEMRS7_pathway.txt 25 32 1 0.10666666666666667 8 2.0 7 3.486666666666667 11 1 10 11 1 "{""k"": 20}" diff --git a/test/analysis/expected_output/expected_example_summary.txt b/test/analysis/expected_output/expected_example_summary.txt index 78fe74d7..f05b1522 100644 --- a/test/analysis/expected_output/expected_example_summary.txt +++ b/test/analysis/expected_output/expected_example_summary.txt @@ -1,13 +1,13 @@ Name Number of nodes Number of edges Number of connected components Density Max degree Median degree Max diameter Average path length Nodes in prize Nodes in active Nodes in dummy Nodes in sources Nodes in targets Parameter combination test/analysis/input/example/data0-allpairs-params-BEH6YB2_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {} -test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'module_threshold': 0.05, 'slice_threshold': 0.3} -test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'max_path_length': 3, 'local_search': True, 'rand_restarts': 10} -test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'flow': 1, 'capacity': 1} -test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.0, 'b': 6.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0} -test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 0.0, 'b': 5.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0} -test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 5.0, 'b': 5.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0} -test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'dummy_mode': 'file', 'mu_squared': False, 'exclude_terms': False, 'noisy_edges': 0, 'shuffled_prizes': 0, 'random_terminals': 0, 'seed': None, 'w': 5.0, 'b': 6.0, 'd': 10, 'mu': 0.0, 'noise': None, 'g': 0.001, 'r': 0.0} -test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 {'w': 5.0, 'b': 4.0, 'g': 0.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None} -test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'w': 5.0, 'b': 2.0, 'g': 3.0, 'noise': None, 'noisy_edges': None, 'random_terminals': None, 'dummy_mode': None, 'seed': None} -test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 200} -test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 {'k': 100} +test/analysis/input/example/data0-domino-params-V3X4RW7_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""module_threshold"": 0.05, ""slice_threshold"": 0.3}" +test/analysis/input/example/data0-meo-params-GKEDDFZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""local_search"": true, ""max_path_length"": 3, ""rand_restarts"": 10}" +test/analysis/input/example/data0-mincostflow-params-SZPZVU6_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""capacity"": 1, ""flow"": 1}" +test/analysis/input/example/data0-omicsintegrator1-params-E3LSEZQ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 6.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.0}" +test/analysis/input/example/data0-omicsintegrator1-params-NFIPHUX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 5.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 0.0}" +test/analysis/input/example/data0-omicsintegrator1-params-SU2S63Y_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 5.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 5.0}" +test/analysis/input/example/data0-omicsintegrator1-params-V26JBGX_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 6.0, ""d"": 10, ""dummy_mode"": ""file"", ""exclude_terms"": false, ""g"": 0.001, ""mu"": 0.0, ""mu_squared"": false, ""noise"": null, ""noisy_edges"": 0, ""r"": 0.0, ""random_terminals"": 0, ""seed"": null, ""shuffled_prizes"": 0, ""w"": 5.0}" +test/analysis/input/example/data0-omicsintegrator2-params-EHHWPMD_pathway.txt 0 0 0 0.0 0 0.0 0 0.0 0 0 0 0 0 "{""b"": 4.0, ""dummy_mode"": ""terminals"", ""g"": 0.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}" +test/analysis/input/example/data0-omicsintegrator2-params-IV3IPCJ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""b"": 2.0, ""dummy_mode"": ""terminals"", ""g"": 3.0, ""noise"": 0.1, ""noisy_edges"": 0, ""random_terminals"": 0, ""seed"": null, ""w"": 5.0}" +test/analysis/input/example/data0-pathlinker-params-6SWY7JS_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""k"": 200}" +test/analysis/input/example/data0-pathlinker-params-VQL7BDZ_pathway.txt 3 2 1 0.6666666666666666 2 1.0 2 1.3333333333333333 2 2 1 1 1 "{""k"": 100}"