diff --git a/CHANGES.md b/CHANGES.md index 4680637..055d833 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -9,6 +9,10 @@ * Performance improvements and simplification to habitat processing. * Store more analysis data from model validation. +### Fixed + +* Fixed a bug in collate data where it would fail to process any files. + ## v1.0.1 (19/10/2025) ### Fixed diff --git a/aoh/validation/collate_data.py b/aoh/validation/collate_data.py index fcce4e4..e55ac00 100644 --- a/aoh/validation/collate_data.py +++ b/aoh/validation/collate_data.py @@ -29,9 +29,11 @@ def collate_data( aoh_results: Path, output_path: Path, ) -> None: - manifests = aoh_results.glob("**/*.json") - if len(list(manifests)) == 0: - sys.exit(f"Found no manifests in {aoh_results}") + # Casting to a list here is a bit wasteful, but I think getting a sense + # that there is no files early leads to better error reporting. + manifests = list(aoh_results.glob("**/*.json")) + if len(manifests) == 0: + raise FileNotFoundError(f"Found no manifests in {aoh_results}") os.makedirs(output_path.parent, exist_ok=True) @@ -73,10 +75,13 @@ def main() -> None: ) args = parser.parse_args() - collate_data( - args.aohs_path, - args.output_path, - ) + try: + collate_data( + args.aohs_path, + args.output_path, + ) + except FileNotFoundError: + sys.exit("Failed to find data") if __name__ == "__main__": main() diff --git a/tests/test_collate_data.py b/tests/test_collate_data.py new file mode 100644 index 0000000..c092d72 --- /dev/null +++ b/tests/test_collate_data.py @@ -0,0 +1,23 @@ +import tempfile +from pathlib import Path + +import pytest +import pandas as pd + +from aoh.validation.collate_data import collate_data + +def test_simple_positive_path() -> None: + with tempfile.TemporaryDirectory() as tmpdir: + src_data_path = Path("tests/testdata/collate_data") + input_file_count = len(list(src_data_path.glob("*.json"))) + + output_path = Path(tmpdir) / "res.csv" + collate_data(src_data_path, output_path) + + results = pd.read_csv(output_path) + assert len(results) == input_file_count + +def test_fails_on_empty_dir() -> None: + with tempfile.TemporaryDirectory() as tmpdir: + with pytest.raises(FileNotFoundError): + collate_data(Path(tmpdir), Path(tmpdir) / "output.csv") diff --git a/tests/testdata/collate_data/1.json b/tests/testdata/collate_data/1.json new file mode 100644 index 0000000..76eeda9 --- /dev/null +++ b/tests/testdata/collate_data/1.json @@ -0,0 +1,18 @@ +{ + "id_no": "1", + "assessment_id": "111", + "assessment_year": "2010", + "season": "RESIDENT", + "elevation_lower": "0", + "elevation_upper": "150", + "full_habitat_code": "3.5|14.3|4.4|3.7|3.6|15.8|4.7|14.6|4.5|14.2|3.4|4.6|14.5|14.1|15.7|15.9|14.4", + "scientific_name": "One une en", + "family_name": "NUMBER", + "class_name": "MAMMALIA", + "category": "LC", + "range_total": 5377499278409.75, + "hab_total": 3142935478203.522, + "dem_total": 5118755494259.25, + "aoh_total": 3026810920143.975, + "prevalence": 0.5628658905258045 +} diff --git a/tests/testdata/collate_data/2.json b/tests/testdata/collate_data/2.json new file mode 100644 index 0000000..c93c1d2 --- /dev/null +++ b/tests/testdata/collate_data/2.json @@ -0,0 +1,18 @@ +{ + "id_no": "2", + "assessment_id": "222", + "assessment_year": "2020", + "season": "RESIDENT", + "elevation_lower": "20", + "elevation_upper": "300", + "full_habitat_code": "3.4|14.3|4.4|1.4|6|3.8|1.3|3.2", + "scientific_name": "Two deux tvÄ", + "family_name": "NUMBER", + "class_name": "MAMMALIA", + "category": "EN", + "range_total": 133914029408.875, + "hab_total": 88190380681.45854, + "dem_total": 129205940713.25, + "aoh_total": 86741613961.4514, + "prevalence": 0.6477410495699915 +}