Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
* Performance improvements and simplification to habitat processing.
* Store more analysis data from model validation.

### Fixed

* Fixed a bug in collate data where it would fail to process any files.

## v1.0.1 (19/10/2025)

### Fixed
Expand Down
19 changes: 12 additions & 7 deletions aoh/validation/collate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ def collate_data(
aoh_results: Path,
output_path: Path,
) -> None:
manifests = aoh_results.glob("**/*.json")
if len(list(manifests)) == 0:
sys.exit(f"Found no manifests in {aoh_results}")
# Casting to a list here is a bit wasteful, but I think getting a sense
# that there is no files early leads to better error reporting.
manifests = list(aoh_results.glob("**/*.json"))
if len(manifests) == 0:
raise FileNotFoundError(f"Found no manifests in {aoh_results}")

os.makedirs(output_path.parent, exist_ok=True)

Expand Down Expand Up @@ -73,10 +75,13 @@ def main() -> None:
)
args = parser.parse_args()

collate_data(
args.aohs_path,
args.output_path,
)
try:
collate_data(
args.aohs_path,
args.output_path,
)
except FileNotFoundError:
sys.exit("Failed to find data")

if __name__ == "__main__":
main()
23 changes: 23 additions & 0 deletions tests/test_collate_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import tempfile
from pathlib import Path

import pytest
import pandas as pd

from aoh.validation.collate_data import collate_data

def test_simple_positive_path() -> None:
with tempfile.TemporaryDirectory() as tmpdir:
src_data_path = Path("tests/testdata/collate_data")
input_file_count = len(list(src_data_path.glob("*.json")))

output_path = Path(tmpdir) / "res.csv"
collate_data(src_data_path, output_path)

results = pd.read_csv(output_path)
assert len(results) == input_file_count

def test_fails_on_empty_dir() -> None:
with tempfile.TemporaryDirectory() as tmpdir:
with pytest.raises(FileNotFoundError):
collate_data(Path(tmpdir), Path(tmpdir) / "output.csv")
18 changes: 18 additions & 0 deletions tests/testdata/collate_data/1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"id_no": "1",
"assessment_id": "111",
"assessment_year": "2010",
"season": "RESIDENT",
"elevation_lower": "0",
"elevation_upper": "150",
"full_habitat_code": "3.5|14.3|4.4|3.7|3.6|15.8|4.7|14.6|4.5|14.2|3.4|4.6|14.5|14.1|15.7|15.9|14.4",
"scientific_name": "One une en",
"family_name": "NUMBER",
"class_name": "MAMMALIA",
"category": "LC",
"range_total": 5377499278409.75,
"hab_total": 3142935478203.522,
"dem_total": 5118755494259.25,
"aoh_total": 3026810920143.975,
"prevalence": 0.5628658905258045
}
18 changes: 18 additions & 0 deletions tests/testdata/collate_data/2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"id_no": "2",
"assessment_id": "222",
"assessment_year": "2020",
"season": "RESIDENT",
"elevation_lower": "20",
"elevation_upper": "300",
"full_habitat_code": "3.4|14.3|4.4|1.4|6|3.8|1.3|3.2",
"scientific_name": "Two deux två",
"family_name": "NUMBER",
"class_name": "MAMMALIA",
"category": "EN",
"range_total": 133914029408.875,
"hab_total": 88190380681.45854,
"dem_total": 129205940713.25,
"aoh_total": 86741613961.4514,
"prevalence": 0.6477410495699915
}