diff --git a/malariagen_data/anoph/cnv_data.py b/malariagen_data/anoph/cnv_data.py index 23eeef33b..fbffeea11 100644 --- a/malariagen_data/anoph/cnv_data.py +++ b/malariagen_data/anoph/cnv_data.py @@ -610,13 +610,19 @@ def cnv_discordant_read_calls( ly.append(y) - if len(ly) == 0: - # Bail out, no data for given sample sets and analysis. - raise ValueError("No data found for requested sample sets.") + # Check after processing all sample sets for a given contig. + if not ly: + # Bail out, no data for given sample sets and analysis. + raise ValueError("No data found for requested sample sets.") x = simple_xarray_concat(ly, dim=DIM_SAMPLE) lx.append(x) + # Optionally, check if no contigs yielded data. + if not lx: + raise ValueError("No data found for requested sample sets across all contigs.") + + ds = simple_xarray_concat(lx, dim=DIM_VARIANT) debug("handle sample query") diff --git a/malariagen_data/util.py b/malariagen_data/util.py index 09e8fbe91..cb73e5bbb 100644 --- a/malariagen_data/util.py +++ b/malariagen_data/util.py @@ -488,7 +488,6 @@ def init_filesystem(url, **kwargs): # Process the URL using fsspec. fs, path = url_to_fs(url, **storage_options) - # Path compatibility, fsspec/gcsfs behaviour varies between versions. while path.endswith("/"): path = path[:-1] diff --git a/tests/anoph/test_cnv_data.py b/tests/anoph/test_cnv_data.py index 54c1ddf12..0b4920cdd 100644 --- a/tests/anoph/test_cnv_data.py +++ b/tests/anoph/test_cnv_data.py @@ -181,6 +181,21 @@ def test_open_cnv_discordant_read_calls(fixture, api: AnophelesCnvData): root = api.open_cnv_discordant_read_calls(sample_set="foobar") +def fake_none_dataset(*args, **kwargs): + return None + +def test_cnv_discordant_read_calls_no_data(monkeypatch): + # Create an instance of AnophelesCnvData; assuming you have a fixture or a way to instantiate. + api = AnophelesCnvData(...) + + # Monkey-patch the dataset method to always return None. + monkeypatch.setattr(api, "_cnv_discordant_read_calls_dataset", fake_none_dataset) + + # Expect a ValueError since no dataset will be found. + with pytest.raises(ValueError, match="No data found for requested sample sets"): + # Pass some valid contig string (e.g., "2RL") and sample_sets + api.cnv_discordant_read_calls(contig="2RL", sample_sets=["some_sample_set"]) + def test_cnv_hmm__sample_query(ag3_sim_fixture, ag3_sim_api: AnophelesCnvData): api = ag3_sim_api fixture = ag3_sim_fixture