From f1cbe331f56051b641b0a4b22410fd04f2eb4323 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Tue, 3 Jun 2025 16:59:51 -0700 Subject: [PATCH 01/12] fix: check if edge weights are between 0 and 1 Closes #237. --- spras/dataset.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/spras/dataset.py b/spras/dataset.py index cf79a3b15..caa47a8b6 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -89,6 +89,9 @@ def load_files_from_dict(self, dataset_dict): "Direction", ] + if not pd.Series(self.interactome.Weight).between(0, 1).all(): + raise ValueError("A member of the Weight column is not between 0 and 1.") + # Make directionality column case-insensitive self.interactome["Direction"] = self.interactome["Direction"].str.upper() if not self.interactome["Direction"].isin(["U", "D"]).all(): From f89e7332ea2f38a7bae8677ff625d2c9e4edf5dd Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Wed, 4 Jun 2025 08:44:52 -0700 Subject: [PATCH 02/12] fix: better err message on invalid ranges --- spras/dataset.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/spras/dataset.py b/spras/dataset.py index caa47a8b6..6e9914bc1 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -89,8 +89,12 @@ def load_files_from_dict(self, dataset_dict): "Direction", ] - if not pd.Series(self.interactome.Weight).between(0, 1).all(): - raise ValueError("A member of the Weight column is not between 0 and 1.") + weight_series = pd.Series(self.interactome.Weight) + weight_series_out_range = ~weight_series.between(0, 1) + if weight_series_out_range.any(): + # Offset the index by 1 to match the file line numbers. + weight_true = (weight_series_out_range.index[weight_series_out_range == True] + 1).tolist() + raise ValueError(f"The following lines of the interactome: ({str(weight_true)}) have weights not between 0 and 1.") # Make directionality column case-insensitive self.interactome["Direction"] = self.interactome["Direction"].str.upper() From 8e4016b57dd57e67e20ed65da9677edbb1ffe148 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 09:30:18 -0700 Subject: [PATCH 03/12] test: dataset --- spras/dataset.py | 14 ++++++++-- test/dataset/fixtures/empty/network.txt | 0 test/dataset/fixtures/empty/nodes.txt | 0 test/dataset/fixtures/empty/scores.txt | 0 test/dataset/fixtures/in-range/network.txt | 2 ++ .../dataset/fixtures/in-range/node-prizes.txt | 3 ++ test/dataset/fixtures/in-range/sources.txt | 1 + test/dataset/fixtures/in-range/targets.txt | 1 + test/dataset/test_dataset.py | 28 +++++++++++++++++++ 9 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 test/dataset/fixtures/empty/network.txt create mode 100644 test/dataset/fixtures/empty/nodes.txt create mode 100644 test/dataset/fixtures/empty/scores.txt create mode 100644 test/dataset/fixtures/in-range/network.txt create mode 100644 test/dataset/fixtures/in-range/node-prizes.txt create mode 100644 test/dataset/fixtures/in-range/sources.txt create mode 100644 test/dataset/fixtures/in-range/targets.txt create mode 100644 test/dataset/test_dataset.py diff --git a/spras/dataset.py b/spras/dataset.py index 6e9914bc1..7f68d31b6 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -1,5 +1,6 @@ import os import pickle as pkl +from typing import TypedDict import warnings import pandas as pd @@ -11,13 +12,20 @@ Methods and intermediate state for loading data and putting it into pandas tables for use by pathway reconstruction algorithms. """ +class DatasetDict(TypedDict): + label: str + node_files: list[str | os.PathLike] + edge_files: list[str | os.PathLike] + other_files: list[str | os.PathLike] + data_dir: str | os.PathLike + class Dataset: NODE_ID = "NODEID" warning_threshold = 0.05 # Threshold for scarcity of columns to warn user - def __init__(self, dataset_dict): + def __init__(self, dataset_dict: DatasetDict): self.label = None self.interactome = None self.node_table = None @@ -43,7 +51,7 @@ def from_file(cls, file_name): with open(file_name, "rb") as f: return pkl.load(f) - def load_files_from_dict(self, dataset_dict): + def load_files_from_dict(self, dataset_dict: DatasetDict): """ Loads data files from dataset_dict, which is one dataset dictionary from the list in the config file with the fields in the config file. @@ -120,7 +128,7 @@ def load_files_from_dict(self, dataset_dict): os.path.join(data_loc, node_file), header=None ) single_node_table.columns = [self.NODE_ID] - new_col_name = node_file.split(".")[0] + new_col_name = str(node_file).split(".")[0] single_node_table[new_col_name] = True # Use only keys from the existing node table so that nodes that are not in the interactome are ignored diff --git a/test/dataset/fixtures/empty/network.txt b/test/dataset/fixtures/empty/network.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/empty/nodes.txt b/test/dataset/fixtures/empty/nodes.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/empty/scores.txt b/test/dataset/fixtures/empty/scores.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/in-range/network.txt b/test/dataset/fixtures/in-range/network.txt new file mode 100644 index 000000000..01116bd07 --- /dev/null +++ b/test/dataset/fixtures/in-range/network.txt @@ -0,0 +1,2 @@ +A B 2.98 U +B C 0.77 U diff --git a/test/dataset/fixtures/in-range/node-prizes.txt b/test/dataset/fixtures/in-range/node-prizes.txt new file mode 100644 index 000000000..d03c30492 --- /dev/null +++ b/test/dataset/fixtures/in-range/node-prizes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy +A 2 true true +C 5.7 true diff --git a/test/dataset/fixtures/in-range/sources.txt b/test/dataset/fixtures/in-range/sources.txt new file mode 100644 index 000000000..f70f10e4d --- /dev/null +++ b/test/dataset/fixtures/in-range/sources.txt @@ -0,0 +1 @@ +A diff --git a/test/dataset/fixtures/in-range/targets.txt b/test/dataset/fixtures/in-range/targets.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/dataset/fixtures/in-range/targets.txt @@ -0,0 +1 @@ +C diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py new file mode 100644 index 000000000..da210c03b --- /dev/null +++ b/test/dataset/test_dataset.py @@ -0,0 +1,28 @@ +import pandas +from pathlib import Path +import pytest + +from spras.dataset import Dataset + +FIXTURES_PATH = Path('test', 'dataset', 'fixtures') + +class TestDataset: + def test_not_allow_no_cols(self): + with pytest.raises(pandas.errors.EmptyDataError): + Dataset({ + 'label': 'empty', + 'edge_files': ['network.txt'], + 'node_files': ['scores.txt', 'nodes.txt'], + 'other_files': [], + 'data_dir': FIXTURES_PATH / 'empty' + }) + + def test_not_allow_edge_weights_oor(self): + with pytest.raises(ValueError): + Dataset({ + 'label': 'empty', + 'edge_files': ['network.txt'], + 'node_files': ['node-prizes.txt', 'sources.txt', 'targets.txt'], + 'other_files': [], + 'data_dir': FIXTURES_PATH / 'in-range' + }) \ No newline at end of file From f275e57b239720ee1884bfc18817175751acea27 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 09:33:10 -0700 Subject: [PATCH 04/12] test: one for some normal dataset --- test/dataset/fixtures/in-range/network.txt | 4 ++-- test/dataset/fixtures/not-in-range/network.txt | 2 ++ .../dataset/fixtures/not-in-range/node-prizes.txt | 3 +++ test/dataset/fixtures/not-in-range/sources.txt | 1 + test/dataset/fixtures/not-in-range/targets.txt | 1 + test/dataset/test_dataset.py | 15 +++++++++++++-- 6 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 test/dataset/fixtures/not-in-range/network.txt create mode 100644 test/dataset/fixtures/not-in-range/node-prizes.txt create mode 100644 test/dataset/fixtures/not-in-range/sources.txt create mode 100644 test/dataset/fixtures/not-in-range/targets.txt diff --git a/test/dataset/fixtures/in-range/network.txt b/test/dataset/fixtures/in-range/network.txt index 01116bd07..5dd49410b 100644 --- a/test/dataset/fixtures/in-range/network.txt +++ b/test/dataset/fixtures/in-range/network.txt @@ -1,2 +1,2 @@ -A B 2.98 U -B C 0.77 U +A B 1 U +B C 0.5 U diff --git a/test/dataset/fixtures/not-in-range/network.txt b/test/dataset/fixtures/not-in-range/network.txt new file mode 100644 index 000000000..01116bd07 --- /dev/null +++ b/test/dataset/fixtures/not-in-range/network.txt @@ -0,0 +1,2 @@ +A B 2.98 U +B C 0.77 U diff --git a/test/dataset/fixtures/not-in-range/node-prizes.txt b/test/dataset/fixtures/not-in-range/node-prizes.txt new file mode 100644 index 000000000..d03c30492 --- /dev/null +++ b/test/dataset/fixtures/not-in-range/node-prizes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy +A 2 true true +C 5.7 true diff --git a/test/dataset/fixtures/not-in-range/sources.txt b/test/dataset/fixtures/not-in-range/sources.txt new file mode 100644 index 000000000..f70f10e4d --- /dev/null +++ b/test/dataset/fixtures/not-in-range/sources.txt @@ -0,0 +1 @@ +A diff --git a/test/dataset/fixtures/not-in-range/targets.txt b/test/dataset/fixtures/not-in-range/targets.txt new file mode 100644 index 000000000..3cc58df83 --- /dev/null +++ b/test/dataset/fixtures/not-in-range/targets.txt @@ -0,0 +1 @@ +C diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index da210c03b..667897cc9 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -24,5 +24,16 @@ def test_not_allow_edge_weights_oor(self): 'edge_files': ['network.txt'], 'node_files': ['node-prizes.txt', 'sources.txt', 'targets.txt'], 'other_files': [], - 'data_dir': FIXTURES_PATH / 'in-range' - }) \ No newline at end of file + 'data_dir': FIXTURES_PATH / 'not-in-range' + }) + + def test_normal(self): + dataset = Dataset({ + 'label': 'empty', + 'edge_files': ['network.txt'], + 'node_files': ['node-prizes.txt', 'sources.txt', 'targets.txt'], + 'other_files': [], + 'data_dir': FIXTURES_PATH / 'in-range' + }) + + assert len(dataset.get_interactome()) == 2 \ No newline at end of file From 04632f1d2a9e04743f909fad0af6ef2d847a8e58 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 13 Jun 2025 16:33:30 +0000 Subject: [PATCH 05/12] style: fmt --- spras/dataset.py | 2 +- test/dataset/test_dataset.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/spras/dataset.py b/spras/dataset.py index 7f68d31b6..861e44948 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -1,7 +1,7 @@ import os import pickle as pkl -from typing import TypedDict import warnings +from typing import TypedDict import pandas as pd diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 667897cc9..c9705dc5a 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -1,5 +1,6 @@ -import pandas from pathlib import Path + +import pandas import pytest from spras.dataset import Dataset @@ -16,7 +17,7 @@ def test_not_allow_no_cols(self): 'other_files': [], 'data_dir': FIXTURES_PATH / 'empty' }) - + def test_not_allow_edge_weights_oor(self): with pytest.raises(ValueError): Dataset({ @@ -36,4 +37,4 @@ def test_normal(self): 'data_dir': FIXTURES_PATH / 'in-range' }) - assert len(dataset.get_interactome()) == 2 \ No newline at end of file + assert len(dataset.get_interactome()) == 2 From e5ad211b605810eddee041426e5edf942f44c336 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Tue, 24 Jun 2025 16:38:15 +0000 Subject: [PATCH 06/12] docs: on datasetdict --- spras/dataset.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spras/dataset.py b/spras/dataset.py index 861e44948..8588ce4bc 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -13,13 +13,16 @@ """ class DatasetDict(TypedDict): + """ + Type class containing a collection of information pertaining to creating a Dataset + object. This layout is replicated directly in SPRAS configuration files. + """ label: str node_files: list[str | os.PathLike] edge_files: list[str | os.PathLike] other_files: list[str | os.PathLike] data_dir: str | os.PathLike - class Dataset: NODE_ID = "NODEID" From e9ad2fd0e0fd7ab26fdebfeca3882b07c5bbc171 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Tue, 24 Jun 2025 16:52:40 +0000 Subject: [PATCH 07/12] chore: update naming for empty ds --- test/dataset/fixtures/empty/{nodes.txt => node-prizes.txt} | 0 test/dataset/fixtures/empty/{scores.txt => sources.txt} | 0 test/dataset/test_dataset.py | 2 +- 3 files changed, 1 insertion(+), 1 deletion(-) rename test/dataset/fixtures/empty/{nodes.txt => node-prizes.txt} (100%) rename test/dataset/fixtures/empty/{scores.txt => sources.txt} (100%) diff --git a/test/dataset/fixtures/empty/nodes.txt b/test/dataset/fixtures/empty/node-prizes.txt similarity index 100% rename from test/dataset/fixtures/empty/nodes.txt rename to test/dataset/fixtures/empty/node-prizes.txt diff --git a/test/dataset/fixtures/empty/scores.txt b/test/dataset/fixtures/empty/sources.txt similarity index 100% rename from test/dataset/fixtures/empty/scores.txt rename to test/dataset/fixtures/empty/sources.txt diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index c9705dc5a..beef71ebe 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -13,7 +13,7 @@ def test_not_allow_no_cols(self): Dataset({ 'label': 'empty', 'edge_files': ['network.txt'], - 'node_files': ['scores.txt', 'nodes.txt'], + 'node_files': ['sources.txt', 'node-prizes.txt'], 'other_files': [], 'data_dir': FIXTURES_PATH / 'empty' }) From 54be58e2d48b86a66e57232c3eee7d1a87b06a7e Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Tue, 24 Jun 2025 16:55:58 +0000 Subject: [PATCH 08/12] test: empty sources, targets, node-prizes, or network --- test/dataset/fixtures/dataless/network.txt | 2 ++ .../dataset/fixtures/dataless/node-prizes.txt | 1 + test/dataset/fixtures/dataless/sources.txt | 0 test/dataset/fixtures/dataless/targets.txt | 0 .../fixtures/empty-network/network.txt | 0 .../fixtures/empty-network/node-prizes.txt | 3 +++ .../fixtures/empty-network/sources.txt | 1 + .../fixtures/empty-network/targets.txt | 1 + test/dataset/test_dataset.py | 20 +++++++++++++++++++ 9 files changed, 28 insertions(+) create mode 100644 test/dataset/fixtures/dataless/network.txt create mode 100644 test/dataset/fixtures/dataless/node-prizes.txt create mode 100644 test/dataset/fixtures/dataless/sources.txt create mode 100644 test/dataset/fixtures/dataless/targets.txt create mode 100644 test/dataset/fixtures/empty-network/network.txt create mode 100644 test/dataset/fixtures/empty-network/node-prizes.txt create mode 100644 test/dataset/fixtures/empty-network/sources.txt create mode 100644 test/dataset/fixtures/empty-network/targets.txt diff --git a/test/dataset/fixtures/dataless/network.txt b/test/dataset/fixtures/dataless/network.txt new file mode 100644 index 000000000..5dd49410b --- /dev/null +++ b/test/dataset/fixtures/dataless/network.txt @@ -0,0 +1,2 @@ +A B 1 U +B C 0.5 U diff --git a/test/dataset/fixtures/dataless/node-prizes.txt b/test/dataset/fixtures/dataless/node-prizes.txt new file mode 100644 index 000000000..26897b5a6 --- /dev/null +++ b/test/dataset/fixtures/dataless/node-prizes.txt @@ -0,0 +1 @@ +NODEID prize active dummy diff --git a/test/dataset/fixtures/dataless/sources.txt b/test/dataset/fixtures/dataless/sources.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/dataless/targets.txt b/test/dataset/fixtures/dataless/targets.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/empty-network/network.txt b/test/dataset/fixtures/empty-network/network.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/empty-network/node-prizes.txt b/test/dataset/fixtures/empty-network/node-prizes.txt new file mode 100644 index 000000000..d03c30492 --- /dev/null +++ b/test/dataset/fixtures/empty-network/node-prizes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy +A 2 true true +C 5.7 true diff --git a/test/dataset/fixtures/empty-network/sources.txt b/test/dataset/fixtures/empty-network/sources.txt new file mode 100644 index 000000000..8c7e5a667 --- /dev/null +++ b/test/dataset/fixtures/empty-network/sources.txt @@ -0,0 +1 @@ +A \ No newline at end of file diff --git a/test/dataset/fixtures/empty-network/targets.txt b/test/dataset/fixtures/empty-network/targets.txt new file mode 100644 index 000000000..7371f47a6 --- /dev/null +++ b/test/dataset/fixtures/empty-network/targets.txt @@ -0,0 +1 @@ +B \ No newline at end of file diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index beef71ebe..2fe2c0a15 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -17,6 +17,26 @@ def test_not_allow_no_cols(self): 'other_files': [], 'data_dir': FIXTURES_PATH / 'empty' }) + + def test_dataless(self): + with pytest.raises(pandas.errors.EmptyDataError): + Dataset({ + 'label': 'dataless', + 'edge_files': ['network.txt'], + 'node_files': ['sources.txt', 'node-prizes.txt'], + 'other_files': [], + 'data_dir': FIXTURES_PATH / 'dataless' + }) + + def test_empty_network(self): + with pytest.raises(pandas.errors.EmptyDataError): + Dataset({ + 'label': 'empty-network', + 'edge_files': ['network.txt'], + 'node_files': ['sources.txt', 'node-prizes.txt'], + 'other_files': [], + 'data_dir': FIXTURES_PATH / 'empty-network' + }) def test_not_allow_edge_weights_oor(self): with pytest.raises(ValueError): From a6788a31629ba774475db41c66e99140f3f9767d Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Tue, 24 Jun 2025 16:57:52 +0000 Subject: [PATCH 09/12] test: empty headers --- test/dataset/fixtures/empty-headers/network.txt | 2 ++ test/dataset/fixtures/empty-headers/node-prizes.txt | 1 + test/dataset/fixtures/empty-headers/sources.txt | 0 test/dataset/fixtures/empty-headers/targets.txt | 0 test/dataset/test_dataset.py | 10 ++++++++++ 5 files changed, 13 insertions(+) create mode 100644 test/dataset/fixtures/empty-headers/network.txt create mode 100644 test/dataset/fixtures/empty-headers/node-prizes.txt create mode 100644 test/dataset/fixtures/empty-headers/sources.txt create mode 100644 test/dataset/fixtures/empty-headers/targets.txt diff --git a/test/dataset/fixtures/empty-headers/network.txt b/test/dataset/fixtures/empty-headers/network.txt new file mode 100644 index 000000000..5dd49410b --- /dev/null +++ b/test/dataset/fixtures/empty-headers/network.txt @@ -0,0 +1,2 @@ +A B 1 U +B C 0.5 U diff --git a/test/dataset/fixtures/empty-headers/node-prizes.txt b/test/dataset/fixtures/empty-headers/node-prizes.txt new file mode 100644 index 000000000..26897b5a6 --- /dev/null +++ b/test/dataset/fixtures/empty-headers/node-prizes.txt @@ -0,0 +1 @@ +NODEID prize active dummy diff --git a/test/dataset/fixtures/empty-headers/sources.txt b/test/dataset/fixtures/empty-headers/sources.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/fixtures/empty-headers/targets.txt b/test/dataset/fixtures/empty-headers/targets.txt new file mode 100644 index 000000000..e69de29bb diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 2fe2c0a15..6e7b612d6 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -18,6 +18,16 @@ def test_not_allow_no_cols(self): 'data_dir': FIXTURES_PATH / 'empty' }) + def test_not_allow_no_cols_headers(self): + with pytest.raises(pandas.errors.EmptyDataError): + Dataset({ + 'label': 'empty-headers', + 'edge_files': ['network.txt'], + 'node_files': ['sources.txt', 'node-prizes.txt'], + 'other_files': [], + 'data_dir': FIXTURES_PATH / 'empty-headers' + }) + def test_dataless(self): with pytest.raises(pandas.errors.EmptyDataError): Dataset({ From 79f168282cefe3590953290fbd5bd23696022783 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Tue, 24 Jun 2025 17:16:21 +0000 Subject: [PATCH 10/12] style: fmt --- test/dataset/test_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 6e7b612d6..d01d16506 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -17,7 +17,7 @@ def test_not_allow_no_cols(self): 'other_files': [], 'data_dir': FIXTURES_PATH / 'empty' }) - + def test_not_allow_no_cols_headers(self): with pytest.raises(pandas.errors.EmptyDataError): Dataset({ @@ -27,7 +27,7 @@ def test_not_allow_no_cols_headers(self): 'other_files': [], 'data_dir': FIXTURES_PATH / 'empty-headers' }) - + def test_dataless(self): with pytest.raises(pandas.errors.EmptyDataError): Dataset({ @@ -37,7 +37,7 @@ def test_dataless(self): 'other_files': [], 'data_dir': FIXTURES_PATH / 'dataless' }) - + def test_empty_network(self): with pytest.raises(pandas.errors.EmptyDataError): Dataset({ From d0fc7e2dc14e59428b92cb651599857c76d77ae9 Mon Sep 17 00:00:00 2001 From: "Tristan F.-R." Date: Fri, 11 Jul 2025 20:17:15 +0000 Subject: [PATCH 11/12] refactor: drop unused dataset edge weight restriction --- spras/dataset.py | 7 ------- test/dataset/fixtures/not-in-range/network.txt | 2 -- test/dataset/fixtures/not-in-range/node-prizes.txt | 3 --- test/dataset/fixtures/not-in-range/sources.txt | 1 - test/dataset/fixtures/not-in-range/targets.txt | 1 - .../fixtures/{in-range => standard}/network.txt | 0 .../{in-range => standard}/node-prizes.txt | 0 .../fixtures/{in-range => standard}/sources.txt | 0 .../fixtures/{in-range => standard}/targets.txt | 0 test/dataset/test_dataset.py | 14 ++------------ 10 files changed, 2 insertions(+), 26 deletions(-) delete mode 100644 test/dataset/fixtures/not-in-range/network.txt delete mode 100644 test/dataset/fixtures/not-in-range/node-prizes.txt delete mode 100644 test/dataset/fixtures/not-in-range/sources.txt delete mode 100644 test/dataset/fixtures/not-in-range/targets.txt rename test/dataset/fixtures/{in-range => standard}/network.txt (100%) rename test/dataset/fixtures/{in-range => standard}/node-prizes.txt (100%) rename test/dataset/fixtures/{in-range => standard}/sources.txt (100%) rename test/dataset/fixtures/{in-range => standard}/targets.txt (100%) diff --git a/spras/dataset.py b/spras/dataset.py index 8588ce4bc..88f664460 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -100,13 +100,6 @@ def load_files_from_dict(self, dataset_dict: DatasetDict): "Direction", ] - weight_series = pd.Series(self.interactome.Weight) - weight_series_out_range = ~weight_series.between(0, 1) - if weight_series_out_range.any(): - # Offset the index by 1 to match the file line numbers. - weight_true = (weight_series_out_range.index[weight_series_out_range == True] + 1).tolist() - raise ValueError(f"The following lines of the interactome: ({str(weight_true)}) have weights not between 0 and 1.") - # Make directionality column case-insensitive self.interactome["Direction"] = self.interactome["Direction"].str.upper() if not self.interactome["Direction"].isin(["U", "D"]).all(): diff --git a/test/dataset/fixtures/not-in-range/network.txt b/test/dataset/fixtures/not-in-range/network.txt deleted file mode 100644 index 01116bd07..000000000 --- a/test/dataset/fixtures/not-in-range/network.txt +++ /dev/null @@ -1,2 +0,0 @@ -A B 2.98 U -B C 0.77 U diff --git a/test/dataset/fixtures/not-in-range/node-prizes.txt b/test/dataset/fixtures/not-in-range/node-prizes.txt deleted file mode 100644 index d03c30492..000000000 --- a/test/dataset/fixtures/not-in-range/node-prizes.txt +++ /dev/null @@ -1,3 +0,0 @@ -NODEID prize active dummy -A 2 true true -C 5.7 true diff --git a/test/dataset/fixtures/not-in-range/sources.txt b/test/dataset/fixtures/not-in-range/sources.txt deleted file mode 100644 index f70f10e4d..000000000 --- a/test/dataset/fixtures/not-in-range/sources.txt +++ /dev/null @@ -1 +0,0 @@ -A diff --git a/test/dataset/fixtures/not-in-range/targets.txt b/test/dataset/fixtures/not-in-range/targets.txt deleted file mode 100644 index 3cc58df83..000000000 --- a/test/dataset/fixtures/not-in-range/targets.txt +++ /dev/null @@ -1 +0,0 @@ -C diff --git a/test/dataset/fixtures/in-range/network.txt b/test/dataset/fixtures/standard/network.txt similarity index 100% rename from test/dataset/fixtures/in-range/network.txt rename to test/dataset/fixtures/standard/network.txt diff --git a/test/dataset/fixtures/in-range/node-prizes.txt b/test/dataset/fixtures/standard/node-prizes.txt similarity index 100% rename from test/dataset/fixtures/in-range/node-prizes.txt rename to test/dataset/fixtures/standard/node-prizes.txt diff --git a/test/dataset/fixtures/in-range/sources.txt b/test/dataset/fixtures/standard/sources.txt similarity index 100% rename from test/dataset/fixtures/in-range/sources.txt rename to test/dataset/fixtures/standard/sources.txt diff --git a/test/dataset/fixtures/in-range/targets.txt b/test/dataset/fixtures/standard/targets.txt similarity index 100% rename from test/dataset/fixtures/in-range/targets.txt rename to test/dataset/fixtures/standard/targets.txt diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index d01d16506..4cb988632 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -48,23 +48,13 @@ def test_empty_network(self): 'data_dir': FIXTURES_PATH / 'empty-network' }) - def test_not_allow_edge_weights_oor(self): - with pytest.raises(ValueError): - Dataset({ - 'label': 'empty', - 'edge_files': ['network.txt'], - 'node_files': ['node-prizes.txt', 'sources.txt', 'targets.txt'], - 'other_files': [], - 'data_dir': FIXTURES_PATH / 'not-in-range' - }) - - def test_normal(self): + def test_standard(self): dataset = Dataset({ 'label': 'empty', 'edge_files': ['network.txt'], 'node_files': ['node-prizes.txt', 'sources.txt', 'targets.txt'], 'other_files': [], - 'data_dir': FIXTURES_PATH / 'in-range' + 'data_dir': FIXTURES_PATH / 'standard' }) assert len(dataset.get_interactome()) == 2 From 74e3080e6752cdb316d0c5d460e584b9da081de0 Mon Sep 17 00:00:00 2001 From: "Tristan F." Date: Thu, 21 Aug 2025 19:55:58 +0000 Subject: [PATCH 12/12] fix: never parse index_col from node data --- spras/dataset.py | 2 +- .../fixtures/toy-372/input-interactome.txt | 2 ++ test/dataset/fixtures/toy-372/input-nodes.txt | 3 +++ test/dataset/test_dataset.py | 22 +++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 test/dataset/fixtures/toy-372/input-interactome.txt create mode 100644 test/dataset/fixtures/toy-372/input-nodes.txt diff --git a/spras/dataset.py b/spras/dataset.py index 891f4d6f9..c2271235c 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -121,7 +121,7 @@ def load_files_from_dict(self, dataset_dict: DatasetDict): # Load generic node tables self.node_table = pd.DataFrame(node_set, columns=[self.NODE_ID]) for node_file in node_data_files: - single_node_table = pd.read_table(os.path.join(data_loc, node_file)) + single_node_table = pd.read_table(os.path.join(data_loc, node_file), index_col=False) # If we have only 1 column, assume this is an indicator variable if len(single_node_table.columns) == 1: single_node_table = pd.read_table( diff --git a/test/dataset/fixtures/toy-372/input-interactome.txt b/test/dataset/fixtures/toy-372/input-interactome.txt new file mode 100644 index 000000000..f252ca4ca --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-interactome.txt @@ -0,0 +1,2 @@ +C D 0.77 U +N O 0.66 U \ No newline at end of file diff --git a/test/dataset/fixtures/toy-372/input-nodes.txt b/test/dataset/fixtures/toy-372/input-nodes.txt new file mode 100644 index 000000000..2efa6f320 --- /dev/null +++ b/test/dataset/fixtures/toy-372/input-nodes.txt @@ -0,0 +1,3 @@ +NODEID prize active dummy sources targets +N +C 5.7 True True diff --git a/test/dataset/test_dataset.py b/test/dataset/test_dataset.py index 4cb988632..52333ca91 100644 --- a/test/dataset/test_dataset.py +++ b/test/dataset/test_dataset.py @@ -2,6 +2,7 @@ import pandas import pytest +import numpy as np from spras.dataset import Dataset @@ -58,3 +59,24 @@ def test_standard(self): }) assert len(dataset.get_interactome()) == 2 + + # 372 is a PR, but for the relevant comment, see + # https://github.com/Reed-CompBio/spras/pull/372/files#r2291953612. + # Note that the input-nodes file has more tabs than the original fixture. + def test_372(self): + dataset = Dataset({ + 'label': 'toy-372', + 'edge_files': ['input-interactome.txt'], + 'node_files': ['input-nodes.txt'], + 'data_dir': FIXTURES_PATH / 'toy-372', + 'other_files': [] + }) + + node_table = dataset.node_table + assert node_table is not None + + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['prize'] == 5.7 + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['active'] == True + + assert np.isnan(node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['sources']) + assert node_table[node_table[Dataset.NODE_ID] == 'C'].iloc[0]['targets'] == True