Skip to content

Commit b24b1a5

Browse files
authored
Merge pull request #50 from quantifyearth/mwd-occurrence-updates
Mwd occurrence updates
2 parents 65693c9 + 57c1718 commit b24b1a5

File tree

6 files changed

+85
-38
lines changed

6 files changed

+85
-38
lines changed

CHANGES.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44

55
* Change the output filename format to match those used in other IUCN processes.
66

7+
### Fixed
8+
9+
* Occurrence validation only handled range polygons in WGS84, now fixed.
10+
711
## v2.0.5 (18/02/2026)
812

913
### Added

aoh/validation/validate_occurences.py

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
import geopandas as gpd
1212
import pandas as pd
1313
import yirgacheffe as yg
14-
from pyproj import Transformer
1514
from shapely.geometry import Point
1615

1716
class Record(NamedTuple):
@@ -39,7 +38,7 @@ def process_species(
3938
raise ValueError("Too many taxon IDs")
4039
taxon_id = taxon_ids[0]
4140

42-
aoh_files = list(aohs_path.glob(f"**/{taxon_id}_*.tif"))
41+
aoh_files = list(aohs_path.glob(f"**/aoh_T{taxon_id}A*_*.tif"))
4342
# We here are aborting on those species with no data or those
4443
# with multiple seasons
4544
if len(aoh_files) == 0:
@@ -52,31 +51,32 @@ def process_species(
5251
with open(aoh_data_path, 'r', encoding='utf-8') as f:
5352
aoh_data = json.load(f)
5453

55-
species_data_files = list(species_data_path.glob(f"**/{taxon_id}_*.geojson"))
54+
species_data_files = list(species_data_path.glob(f"**/range_T{taxon_id}A*_*.geojson"))
5655
if len(species_data_files) != 1:
5756
raise RuntimeError(
5857
f"We expected one GeoJSON file beside the GeoTIFF, we found {len(species_data_files)} for {taxon_id}"
5958
)
6059
species_range = gpd.read_file(species_data_files[0])
60+
with yg.read_raster(aoh_files[0]) as aoh:
6161

62-
# From Dahal et al: "This ensured that we only included points which fell inside
63-
# the boundaries of the selected range maps."
64-
points_gdf = gpd.GeoDataFrame(
65-
species_occurrences,
66-
geometry=[
67-
Point(lon, lat)
68-
for lon, lat in
69-
zip(species_occurrences['decimalLongitude'], species_occurrences['decimalLatitude'])
70-
],
71-
crs='EPSG:4326',
72-
)
73-
clipped_points = gpd.sjoin(points_gdf, species_range, predicate='within', how='inner')
62+
# From Dahal et al: "This ensured that we only included points which fell inside
63+
# the boundaries of the selected range maps."
64+
wgs84_points_gdf = gpd.GeoDataFrame(
65+
species_occurrences,
66+
geometry=[
67+
Point(lon, lat)
68+
for lon, lat in
69+
zip(species_occurrences['decimalLongitude'], species_occurrences['decimalLatitude'])
70+
],
71+
crs='EPSG:4326',
72+
)
7473

75-
pixel_set = set()
76-
with yg.read_raster(aoh_files[0]) as aoh:
7774
# The GBIF data is in WGS84, and so we need to map that to a point in the
7875
# AOH raster projection space
79-
transformer = Transformer.from_crs("EPSG:4326", aoh.map_projection.name)
76+
points_gdf = wgs84_points_gdf.to_crs(aoh.map_projection.name)
77+
clipped_points = gpd.sjoin(points_gdf, species_range, predicate='within', how='inner')
78+
79+
pixel_set = set()
8080

8181
results = []
8282
for _, row in clipped_points.iterrows():
@@ -85,7 +85,8 @@ def process_species(
8585
# each species point locality was determined using bilinear interpolation of the four
8686
# nearest cells."
8787

88-
x, y = transformer.transform(row.decimalLongitude, row.decimalLatitude)
88+
# These have been converted to the right projection already
89+
x, y = row.geometry.x, row.geometry.y
8990
aligned_x = (x - aoh.area.left) / aoh.map_projection.xstep
9091
aligned_y = (y - aoh.area.top) / aoh.map_projection.ystep
9192
floored_aligned_x = math.floor(aligned_x)
@@ -137,7 +138,6 @@ def process_species(
137138
point_prevalence = None
138139
is_outlier = None
139140

140-
141141
return Record(
142142
taxon_id, # Species Redlist ID
143143
len(species_occurrences), # Raw number of occurrences from GBIF

tests/test_aoh_binary.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,6 @@ def test_no_elevation_aoh(force_habitat) -> None:
519519
assert manifest["full_habitat_code"] == "1.1"
520520

521521
# The default IUCN behaviour is to revert to range if no elevation
522-
print(manifest)
523522
assert manifest["range_total"] == 60
524523
assert manifest["dem_total"] == 0
525524
assert manifest["hab_total"] == 20

tests/test_habitat_process.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,6 @@ def test_rescale_make_single_map() -> None:
9090

9191
with yg.read_raster(habitat_path) as original:
9292
with yg.read_raster(expected_result_path) as result:
93-
print(result.window)
94-
print(result.area)
9593
assert result.window.xsize == original.window.xsize / 2
9694
assert result.window.ysize == original.window.ysize / 2
9795
original_data = original.read_array(0, 0, original.window.xsize, original.window.ysize)

tests/test_occurences.py

Lines changed: 61 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def generate_occurrence_cluster(
2525
res.append((latitude + y, longitude + x))
2626
return res
2727

28-
def geojson_of_shaps(shapes):
28+
def geojson_of_shapes(shapes, projection: str = "epsg:4326"):
2929
features = []
3030
for geom in shapes:
3131
feature = {
@@ -37,11 +37,18 @@ def geojson_of_shaps(shapes):
3737

3838
geojson = {
3939
"type": "FeatureCollection",
40+
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:" + ("::".join(projection.split(":"))) } },
4041
"features": features
4142
}
4243
return geojson
4344

44-
def generate_faux_aoh(filename: Path, aoh_radius:float=5.0, range_radius:float=10.0) -> None:
45+
def generate_faux_aoh(
46+
filename: Path,
47+
aoh_radius: float=5.0,
48+
range_radius: float=10.0,
49+
projection: str="epsg:4326",
50+
pixel_scale: float=0.1,
51+
) -> None:
4552
aoh_shapes = [
4653
Polygon([
4754
(-aoh_radius, aoh_radius),
@@ -64,19 +71,19 @@ def generate_faux_aoh(filename: Path, aoh_radius:float=5.0, range_radius:float=1
6471

6572
assert aoh_area <= range_area
6673

67-
geojson_path = filename.with_suffix('.geojson')
74+
geojson_path = filename.parent / f"range_{filename.stem[4:]}.geojson"
6875
with open(geojson_path, 'w', encoding="UTF-8") as f:
69-
json.dump(geojson_of_shaps(range_shapes), f, indent=2)
76+
json.dump(geojson_of_shapes(range_shapes, projection), f, indent=2)
7077

7178
json_path = filename.with_suffix('.json')
7279
with open(json_path, 'w', encoding='utf-8') as f:
7380
json.dump({'prevalence': aoh_area / range_area}, f)
7481

7582
with tempfile.TemporaryDirectory() as tmpdir:
76-
aoh_geojson = Path(tmpdir) / "test.geojson"
83+
aoh_geojson = Path(tmpdir) / f"aoh_{filename.stem[4:]}.geojson"
7784
with open(aoh_geojson, 'w', encoding="UTF-8") as f:
78-
json.dump(geojson_of_shaps(aoh_shapes), f, indent=2)
79-
with yg.read_shape(aoh_geojson, ("epsg:4326", (0.1, -0.1))) as shape_layer:
85+
json.dump(geojson_of_shapes(aoh_shapes, projection), f, indent=2)
86+
with yg.read_shape(aoh_geojson, (projection, (pixel_scale, -pixel_scale))) as shape_layer:
8087
shape_layer.to_geotiff(filename)
8188

8289
@pytest.mark.parametrize("taxon_id,latitude,longitude,is_valid,expected_outlier",[
@@ -98,7 +105,7 @@ def test_simple_match_in_out_range(
98105
tmpdir_path = Path(tmpdir)
99106

100107
for test_id in [41, 42, 43]:
101-
aoh_path = tmpdir_path / f"{test_id}_RESIDENT.tif"
108+
aoh_path = tmpdir_path / f"aoh_T{test_id}A1_RESIDENT.tif"
102109
generate_faux_aoh(aoh_path)
103110

104111
occurences = generate_occurrence_cluster(latitude, longitude, 20, 2.0)
@@ -135,7 +142,7 @@ def test_bilinear_interprolation_lat(
135142
) -> None:
136143
with tempfile.TemporaryDirectory() as tmpdir:
137144
tmpdir_path = Path(tmpdir)
138-
aoh_path = tmpdir_path / "42_RESIDENT.tif"
145+
aoh_path = tmpdir_path / "aoh_T42A1_RESIDENT.tif"
139146
generate_faux_aoh(aoh_path)
140147

141148
# The AOH by default is 100x100 pixels, in 0.1 degree increments from -5.0 to 5.0
@@ -180,7 +187,7 @@ def test_bilinear_interprolation_lng(
180187
) -> None:
181188
with tempfile.TemporaryDirectory() as tmpdir:
182189
tmpdir_path = Path(tmpdir)
183-
aoh_path = tmpdir_path / "42_RESIDENT.tif"
190+
aoh_path = tmpdir_path / "aoh_T42A1_RESIDENT.tif"
184191
generate_faux_aoh(aoh_path)
185192

186193
# The AOH by default is 100x100 pixels, in 0.1 degree increments from -5.0 to 5.0
@@ -215,7 +222,7 @@ def test_occurrence_prevalence_of_one(
215222
tmpdir_path = Path(tmpdir)
216223

217224
for test_id in [41, 42, 43]:
218-
aoh_path = tmpdir_path / f"{test_id}_RESIDENT.tif"
225+
aoh_path = tmpdir_path / f"aoh_T{test_id}A1_RESIDENT.tif"
219226
generate_faux_aoh(aoh_path, aoh_radius=5.0, range_radius=5.0)
220227

221228
occurences = generate_occurrence_cluster(latitude, longitude, 20, 2.0)
@@ -237,7 +244,7 @@ def test_no_aoh_found() -> None:
237244
tmpdir_path = Path(tmpdir)
238245

239246
for test_id in [41, 42, 43]:
240-
aoh_path = tmpdir_path / f"{test_id}_RESIDENT.tif"
247+
aoh_path = tmpdir_path / f"aoh_T{test_id}A1_RESIDENT.tif"
241248
generate_faux_aoh(aoh_path)
242249

243250
df = pd.DataFrame(
@@ -265,7 +272,7 @@ def test_find_seasonal() -> None:
265272
tmpdir_path = Path(tmpdir)
266273

267274
for season in ['breeding', 'nonbreeding']:
268-
aoh_path = tmpdir_path / f"42_{season}.tif"
275+
aoh_path = tmpdir_path / f"aoh_T42A1a_{season}.tif"
269276
generate_faux_aoh(aoh_path)
270277

271278
df = pd.DataFrame(
@@ -280,3 +287,44 @@ def test_empty_species_list() -> None:
280287
df = pd.DataFrame([], columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude'])
281288
with pytest.raises(ValueError):
282289
_ = process_species(Path("/some/aohs"), Path("/some/aohs"), df)
290+
291+
@pytest.mark.parametrize("taxon_id,latitude,longitude,expected_prev,is_valid,expected_outlier",[
292+
(42, 0.0, 0.0, 1.0, True, False), # all in AoH
293+
(42, 0.0, 20.0, None, False, None), # all out of range
294+
])
295+
def test_aoh_not_in_wgs84(
296+
taxon_id: int,
297+
latitude: float,
298+
longitude: float,
299+
expected_prev: float,
300+
is_valid: bool,
301+
expected_outlier: bool,
302+
) -> None:
303+
with tempfile.TemporaryDirectory() as tmpdir:
304+
tmpdir_path = Path(tmpdir)
305+
306+
for test_id in [41, 42, 43]:
307+
aoh_path = tmpdir_path / f"aoh_T{test_id}A1_RESIDENT.tif"
308+
# Generate an AOH raster and GeoJSON in ESRI:54009
309+
generate_faux_aoh(
310+
aoh_path,
311+
aoh_radius=100000.0,
312+
range_radius=100000.0,
313+
projection="esri:54009",
314+
pixel_scale=100.0
315+
)
316+
317+
# Occurrences are still in EPSG:4326, as that's what GBIF gives us
318+
occurences = generate_occurrence_cluster(latitude, longitude, 20, 0.5)
319+
df = pd.DataFrame(
320+
[(taxon_id, lat, lng) for (lat, lng) in occurences],
321+
columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude']
322+
)
323+
324+
result = process_species(tmpdir_path, tmpdir_path, df)
325+
assert result.iucn_taxon_id == taxon_id
326+
assert result.total_records == len(occurences)
327+
assert result.point_prevalence == expected_prev
328+
assert result.model_prevalence == 1.0
329+
assert result.is_valid == is_valid
330+
assert result.is_outlier == expected_outlier

tests/test_species_richness.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,5 @@ def test_simple_summary_gaps() -> None:
115115
[0, 1, 0, 1],
116116
]).astype(float)
117117
expected[expected==0] = np.nan
118-
print(expected)
119118
result_data = result.read_array(0, 0, 4, 4)
120-
print(result_data)
121119
assert np.allclose(result_data, expected, equal_nan=True)

0 commit comments

Comments
 (0)