Skip to content

Commit cfbafe9

Browse files
committed
Fix bug in collate data and add tests to catch it
1 parent eb1b2ab commit cfbafe9

File tree

5 files changed

+75
-7
lines changed

5 files changed

+75
-7
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,10 @@
99
* Performance improvements and simplification to habitat processing.
1010
* Store more analysis data from model validation.
1111

12+
### Fixed
13+
14+
* Fixed a bug in collate data where it would fail to process any files.
15+
1216
## v1.0.1 (19/10/2025)
1317

1418
### Fixed

aoh/validation/collate_data.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,11 @@ def collate_data(
2929
aoh_results: Path,
3030
output_path: Path,
3131
) -> None:
32-
manifests = aoh_results.glob("**/*.json")
33-
if len(list(manifests)) == 0:
34-
sys.exit(f"Found no manifests in {aoh_results}")
32+
# Casting to a list here is a bit wasteful, but I think getting a sense
33+
# that there is no files early leads to better error reporting.
34+
manifests = list(aoh_results.glob("**/*.json"))
35+
if len(manifests) == 0:
36+
raise FileNotFoundError(f"Found no manifests in {aoh_results}")
3537

3638
os.makedirs(output_path.parent, exist_ok=True)
3739

@@ -73,10 +75,13 @@ def main() -> None:
7375
)
7476
args = parser.parse_args()
7577

76-
collate_data(
77-
args.aohs_path,
78-
args.output_path,
79-
)
78+
try:
79+
collate_data(
80+
args.aohs_path,
81+
args.output_path,
82+
)
83+
except FileNotFoundError:
84+
sys.exit("Failed to find data")
8085

8186
if __name__ == "__main__":
8287
main()

tests/test_collate_data.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import tempfile
2+
from pathlib import Path
3+
4+
import pytest
5+
import pandas as pd
6+
7+
from aoh.validation.collate_data import collate_data
8+
9+
def test_simple_positive_path() -> None:
10+
with tempfile.TemporaryDirectory() as tmpdir:
11+
src_data_path = Path("tests/testdata/collate_data")
12+
input_file_count = len(list(src_data_path.glob("*.json")))
13+
14+
output_path = Path(tmpdir) / "res.csv"
15+
collate_data(src_data_path, output_path)
16+
17+
results = pd.read_csv(output_path)
18+
assert len(results) == input_file_count
19+
20+
def test_fails_on_empty_dir() -> None:
21+
with tempfile.TemporaryDirectory() as tmpdir:
22+
with pytest.raises(FileNotFoundError):
23+
collate_data(Path(tmpdir), Path(tmpdir) / "output.csv")

tests/testdata/collate_data/1.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"id_no": "1",
3+
"assessment_id": "111",
4+
"assessment_year": "2010",
5+
"season": "RESIDENT",
6+
"elevation_lower": "0",
7+
"elevation_upper": "150",
8+
"full_habitat_code": "3.5|14.3|4.4|3.7|3.6|15.8|4.7|14.6|4.5|14.2|3.4|4.6|14.5|14.1|15.7|15.9|14.4",
9+
"scientific_name": "One une en",
10+
"family_name": "NUMBER",
11+
"class_name": "MAMMALIA",
12+
"category": "LC",
13+
"range_total": 5377499278409.75,
14+
"hab_total": 3142935478203.522,
15+
"dem_total": 5118755494259.25,
16+
"aoh_total": 3026810920143.975,
17+
"prevalence": 0.5628658905258045
18+
}

tests/testdata/collate_data/2.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
{
2+
"id_no": "2",
3+
"assessment_id": "222",
4+
"assessment_year": "2020",
5+
"season": "RESIDENT",
6+
"elevation_lower": "20",
7+
"elevation_upper": "300",
8+
"full_habitat_code": "3.4|14.3|4.4|1.4|6|3.8|1.3|3.2",
9+
"scientific_name": "Two deux två",
10+
"family_name": "NUMBER",
11+
"class_name": "MAMMALIA",
12+
"category": "EN",
13+
"range_total": 133914029408.875,
14+
"hab_total": 88190380681.45854,
15+
"dem_total": 129205940713.25,
16+
"aoh_total": 86741613961.4514,
17+
"prevalence": 0.6477410495699915
18+
}

0 commit comments

Comments
 (0)