From 5c5e41beb2cf82ae9649f59f182d28b89aa9d74e Mon Sep 17 00:00:00 2001 From: Jiya873 Date: Tue, 20 May 2025 10:45:49 +0530 Subject: [PATCH 1/2] Fix cnv_discordant_read_calls: raise ValueError when no data found --- malariagen_data/anoph/cnv_data.py | 12 +++++++++--- malariagen_data/util.py | 1 - 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/malariagen_data/anoph/cnv_data.py b/malariagen_data/anoph/cnv_data.py index 23eeef33b..fbffeea11 100644 --- a/malariagen_data/anoph/cnv_data.py +++ b/malariagen_data/anoph/cnv_data.py @@ -610,13 +610,19 @@ def cnv_discordant_read_calls( ly.append(y) - if len(ly) == 0: - # Bail out, no data for given sample sets and analysis. - raise ValueError("No data found for requested sample sets.") + # Check after processing all sample sets for a given contig. + if not ly: + # Bail out, no data for given sample sets and analysis. + raise ValueError("No data found for requested sample sets.") x = simple_xarray_concat(ly, dim=DIM_SAMPLE) lx.append(x) + # Optionally, check if no contigs yielded data. + if not lx: + raise ValueError("No data found for requested sample sets across all contigs.") + + ds = simple_xarray_concat(lx, dim=DIM_VARIANT) debug("handle sample query") diff --git a/malariagen_data/util.py b/malariagen_data/util.py index 09e8fbe91..cb73e5bbb 100644 --- a/malariagen_data/util.py +++ b/malariagen_data/util.py @@ -488,7 +488,6 @@ def init_filesystem(url, **kwargs): # Process the URL using fsspec. fs, path = url_to_fs(url, **storage_options) - # Path compatibility, fsspec/gcsfs behaviour varies between versions. while path.endswith("/"): path = path[:-1] From 28d592c2d0be6ddafcb079ca048099157ea8fcbd Mon Sep 17 00:00:00 2001 From: Jiya873 Date: Wed, 21 May 2025 08:57:17 +0530 Subject: [PATCH 2/2] Add tests to cover ValueError in cnv_discordant_read_calls --- tests/anoph/test_cnv_data.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tests/anoph/test_cnv_data.py b/tests/anoph/test_cnv_data.py index 54c1ddf12..0b4920cdd 100644 --- a/tests/anoph/test_cnv_data.py +++ b/tests/anoph/test_cnv_data.py @@ -181,6 +181,21 @@ def test_open_cnv_discordant_read_calls(fixture, api: AnophelesCnvData): root = api.open_cnv_discordant_read_calls(sample_set="foobar") +def fake_none_dataset(*args, **kwargs): + return None + +def test_cnv_discordant_read_calls_no_data(monkeypatch): + # Create an instance of AnophelesCnvData; assuming you have a fixture or a way to instantiate. + api = AnophelesCnvData(...) + + # Monkey-patch the dataset method to always return None. + monkeypatch.setattr(api, "_cnv_discordant_read_calls_dataset", fake_none_dataset) + + # Expect a ValueError since no dataset will be found. + with pytest.raises(ValueError, match="No data found for requested sample sets"): + # Pass some valid contig string (e.g., "2RL") and sample_sets + api.cnv_discordant_read_calls(contig="2RL", sample_sets=["some_sample_set"]) + def test_cnv_hmm__sample_query(ag3_sim_fixture, ag3_sim_api: AnophelesCnvData): api = ag3_sim_api fixture = ag3_sim_fixture