Skip to content

Commit f544505

Browse files
committed
Fixed tests and small logic tweak
1 parent 1500957 commit f544505

File tree

2 files changed

+143
-66
lines changed

2 files changed

+143
-66
lines changed

aoh/validation/validate_occurences.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def process_species(
5656
clipped_points = gpd.sjoin(points_gdf, species_range, predicate='within', how='inner')
5757

5858
pixel_set = set()
59+
tested = 0
5960
with yg.read_raster(aoh_files[0]) as aoh:
6061
results = []
6162
for _, row in clipped_points.iterrows():
@@ -69,22 +70,36 @@ def process_species(
6970

7071
value = aoh.read_array(pixel_x, pixel_y, 1, 1)
7172
results.append(value[0][0] > 0.0)
73+
tested += 1
7274

7375
# From Dahal et al: "Finally, we excluded species which had fewer than 10 point localities after
7476
# all the filters were applied."
75-
if len(results) < 10:
77+
if tested < 10:
7678
raise ValueError("Not enough occurrences")
7779

7880
matches = len([x for x in results if x])
7981
point_prevalence = matches / len(results)
8082
model_prevalence = aoh_data['prevalence']
83+
84+
# From Dahal et al: "If the point prevalence exceeded model prevalence at
85+
# species level, the AOH maps performed better than random,
86+
# otherwise they were no better than random."
87+
#
88+
# However, note that this means if you have a point prevalence of 1.0 (all
89+
# points match) and a model prevalence of 1.0 (range and AOH match, which
90+
# under the IUCN method is the preferred fallback if we have zero on either
91+
# elevation filtering or habitat filtering), then that would still be marked
92+
# as an outlier, (as 1.0 is not exceeding 1.0) which seems wrong, so I'm
93+
# special casing that.
94+
is_outlier = (point_prevalence != 1.0) and (point_prevalence < model_prevalence)
95+
8196
return (
8297
taxon_id,
8398
len(results),
8499
matches,
85100
point_prevalence,
86101
model_prevalence,
87-
point_prevalence <= model_prevalence
102+
is_outlier,
88103
)
89104

90105
def process_species_wrapper(
@@ -127,8 +142,8 @@ def validate_occurrences(
127142
"model prevalence",
128143
"outlier",
129144
])
130-
summary = summary[summary.outlier == True]
131-
summary.to_csv(output_path, index=False)
145+
outliers = summary[summary.outlier is True]
146+
outliers.to_csv(output_path, index=False)
132147

133148
def main() -> None:
134149
parser = argparse.ArgumentParser(description="Validate map prevalence.")

tests/test_occurences.py

Lines changed: 124 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import json
2+
import math
23
import tempfile
34
from pathlib import Path
45

@@ -9,16 +10,22 @@
910

1011
from aoh.validation.validate_occurences import process_species
1112

12-
def test_empty_species_list() -> None:
13-
df = pd.DataFrame([], columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude'])
14-
res = process_species(Path("/some/aohs"), Path("/some/aohs"), df)
15-
assert res is None
16-
17-
def generate_faux_aoh(filename: Path, shape: Polygon | None = None) -> None:
18-
shapes = [
19-
shape if shape is not None else Polygon([(0, 0), (0, 10), (10, 10), (10, 0)])
20-
]
21-
13+
def generate_occurrence_cluster(
14+
latitude: float,
15+
longitude: float,
16+
count: int,
17+
radius: float,
18+
) -> list[tuple[float,float]]:
19+
res = [(latitude, longitude)]
20+
rotation = (math.pi * 2) / (count - 1)
21+
for i in range(count - 1):
22+
angle = i * rotation
23+
x = radius * math.cos(angle)
24+
y = radius * math.sin(angle)
25+
res.append((latitude + y, longitude + x))
26+
return res
27+
28+
def geojson_of_shaps(shapes):
2229
features = []
2330
for geom in shapes:
2431
feature = {
@@ -32,59 +39,128 @@ def generate_faux_aoh(filename: Path, shape: Polygon | None = None) -> None:
3239
"type": "FeatureCollection",
3340
"features": features
3441
}
42+
return geojson
43+
44+
def generate_faux_aoh(filename: Path, aoh_radius:float=5.0, range_radius:float=10.0) -> None:
45+
aoh_shapes = [
46+
Polygon([
47+
(-aoh_radius, aoh_radius),
48+
(aoh_radius, aoh_radius),
49+
(aoh_radius, -aoh_radius),
50+
(-aoh_radius, -aoh_radius)
51+
])
52+
]
53+
aoh_area = sum(x.area for x in aoh_shapes)
54+
55+
range_shapes = [
56+
Polygon([
57+
(-range_radius, range_radius),
58+
(range_radius, range_radius),
59+
(range_radius, -range_radius),
60+
(-range_radius, -range_radius)
61+
])
62+
]
63+
range_area = sum(x.area for x in range_shapes)
64+
65+
assert aoh_area <= range_area
3566

3667
geojson_path = filename.with_suffix('.geojson')
3768
with open(geojson_path, 'w', encoding="UTF-8") as f:
38-
json.dump(geojson, f, indent=2)
69+
json.dump(geojson_of_shaps(range_shapes), f, indent=2)
3970

4071
json_path = filename.with_suffix('.json')
4172
with open(json_path, 'w', encoding='utf-8') as f:
42-
json.dump({'prevalence': 1.0}, f)
73+
json.dump({'prevalence': aoh_area / range_area}, f)
4374

44-
with yg.read_shape(geojson_path, ("epsg:4326", (1.0, -1.0))) as shape_layer:
45-
shape_layer.to_geotiff(filename)
46-
47-
@pytest.mark.parametrize("taxon_id,latitude,longitude,expected",[
48-
(42, 5.0, 5.0, True),
49-
(42, 12.0, 12.0, False),
75+
with tempfile.TemporaryDirectory() as tmpdir:
76+
aoh_geojson = Path(tmpdir) / "test.geojson"
77+
with open(aoh_geojson, 'w', encoding="UTF-8") as f:
78+
json.dump(geojson_of_shaps(aoh_shapes), f, indent=2)
79+
with yg.read_shape(aoh_geojson, ("epsg:4326", (0.1, -0.1))) as shape_layer:
80+
shape_layer.to_geotiff(filename)
81+
82+
@pytest.mark.parametrize("taxon_id,latitude,longitude,expected_reject,expected_outlier",[
83+
(42, 0.0, 0.0, False, False), # all in AoH
84+
(42, 0.0, 4.0, False, False), # Most in AOH, a few in range
85+
(42, 0.0, 6.5, False, True), # Most in range, a few in AOH
86+
(42, 0.0, 7.5, False, True), # all in range but not AOH
87+
(42, 0.0, 11.0, True, None), # most out of range
88+
(42, 0.0, 20.0, True, None), # all out of range
5089
])
51-
def test_simple_match(taxon_id: int, latitude: float, longitude: float, expected: bool) -> None:
90+
def test_simple_match_in_out_range(
91+
taxon_id: int,
92+
latitude: float,
93+
longitude: float,
94+
expected_reject: bool,
95+
expected_outlier: bool,
96+
) -> None:
5297
with tempfile.TemporaryDirectory() as tmpdir:
5398
tmpdir_path = Path(tmpdir)
5499

55100
for test_id in [41, 42, 43]:
56101
aoh_path = tmpdir_path / f"{test_id}.tif"
57102
generate_faux_aoh(aoh_path)
58103

104+
occurences = generate_occurrence_cluster(latitude, longitude, 20, 2.0)
59105
df = pd.DataFrame(
60-
[(taxon_id, latitude, longitude)],
106+
[(taxon_id, lat, lng) for (lat, lng) in occurences],
61107
columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude']
62108
)
63-
res = process_species(tmpdir_path, tmpdir_path, df)
64109

65-
id_no, results, matches, point_prev, model_prev, outlier = res
66-
assert id_no == taxon_id
67-
assert results == 1
68-
assert matches == (1 if expected else 0)
69-
assert model_prev == 1.0
70-
assert outlier == expected
71-
72-
def test_no_aoh_found(taxon_id: int, latitude: float, longitude: float, expected: bool) -> None:
110+
if not expected_reject:
111+
res = process_species(tmpdir_path, tmpdir_path, df)
112+
id_no, results, _matches, _point_prev, _model_prev, outlier = res
113+
print(res)
114+
assert id_no == taxon_id
115+
assert results == len(occurences)
116+
assert outlier == expected_outlier
117+
else:
118+
with pytest.raises(ValueError):
119+
_ = process_species(tmpdir_path, tmpdir_path, df)
120+
121+
@pytest.mark.parametrize("taxon_id,latitude,longitude,expected_prev,expected_reject,expected_outlier",[
122+
(42, 0.0, 0.0, 1.0, False, False), # all in AoH
123+
# (42, 0.0, 4.0, False, False), # Most in AOH, a few in range
124+
# (42, 0.0, 6.5, False, True), # Most in range, a few in AOH
125+
# (42, 0.0, 7.5, False, True), # all in range but not AOH
126+
# (42, 0.0, 11.0, True, None), # most out of range
127+
(42, 0.0, 20.0, 0.0, True, None), # all out of range
128+
])
129+
def test_model_prevalence_of_one(
130+
taxon_id: int,
131+
latitude: float,
132+
longitude: float,
133+
expected_prev: float,
134+
expected_reject: bool,
135+
expected_outlier: bool,
136+
) -> None:
73137
with tempfile.TemporaryDirectory() as tmpdir:
74138
tmpdir_path = Path(tmpdir)
75139

76140
for test_id in [41, 42, 43]:
77141
aoh_path = tmpdir_path / f"{test_id}.tif"
78-
generate_faux_aoh(aoh_path)
142+
generate_faux_aoh(aoh_path, aoh_radius=5.0, range_radius=5.0)
79143

144+
occurences = generate_occurrence_cluster(latitude, longitude, 20, 2.0)
80145
df = pd.DataFrame(
81-
[(40, 5.0, 5.0)],
146+
[(taxon_id, lat, lng) for (lat, lng) in occurences],
82147
columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude']
83148
)
84-
with pytest.raises(FileNotFoundError):
85-
_ = process_species(tmpdir_path, tmpdir_path, df)
86149

87-
def test_multiple_match() -> None:
150+
if not expected_reject:
151+
res = process_species(tmpdir_path, tmpdir_path, df)
152+
id_no, results, _matches, point_prev, model_prev, outlier = res
153+
print(res)
154+
assert id_no == taxon_id
155+
assert results == len(occurences)
156+
assert point_prev == expected_prev
157+
assert model_prev == 1.0
158+
assert outlier == expected_outlier
159+
else:
160+
with pytest.raises(ValueError):
161+
_ = process_species(tmpdir_path, tmpdir_path, df)
162+
163+
def test_no_aoh_found() -> None:
88164
with tempfile.TemporaryDirectory() as tmpdir:
89165
tmpdir_path = Path(tmpdir)
90166

@@ -93,17 +169,11 @@ def test_multiple_match() -> None:
93169
generate_faux_aoh(aoh_path)
94170

95171
df = pd.DataFrame(
96-
[
97-
(42, 5.0, 5.0, True),
98-
(42, 12.0, 12.0, False),
99-
],
100-
columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude', 'expected']
172+
[(40, 5.0, 5.0)],
173+
columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude']
101174
)
102-
103-
res = process_species(tmpdir_path, tmpdir_path, df)
104-
105-
assert len(res) == len(df)
106-
assert (res.occurence == res.expected).all()
175+
with pytest.raises(FileNotFoundError):
176+
_ = process_species(tmpdir_path, tmpdir_path, df)
107177

108178
def test_too_many_ids() -> None:
109179
df = pd.DataFrame(
@@ -118,31 +188,23 @@ def test_too_many_ids() -> None:
118188
with pytest.raises(ValueError):
119189
_ = process_species(Path("/some/aohs"), Path("/some/aohs"), df)
120190

121-
@pytest.mark.parametrize("taxon_id,latitude,longitude,expected",[
122-
(42, 5.0, 5.0, True),
123-
(42, -5.0, -5.0, True),
124-
(42, 5.0, -5.0, False),
125-
(42, -5.0, 5.0, False),
126-
(40, 5.0, 5.0, False),
127-
])
128-
def test_find_seasonal(taxon_id: int, latitude: float, longitude: float, expected: bool) -> None:
191+
def test_find_seasonal() -> None:
129192
with tempfile.TemporaryDirectory() as tmpdir:
130193
tmpdir_path = Path(tmpdir)
131194

132-
for season, shape in [
133-
('breeding', Polygon([(0, 0), (0, 10), (10, 10), (10, 0)])),
134-
('nonbreeding', Polygon([(0, 0), (0, -10), (-10, -10), (-10, 0)])),
135-
]:
195+
for season in ['breeding', 'nonbreeding']:
136196
aoh_path = tmpdir_path / f"42_{season}.tif"
137-
generate_faux_aoh(aoh_path, shape)
197+
generate_faux_aoh(aoh_path)
138198

139199
df = pd.DataFrame(
140-
[(taxon_id, latitude, longitude)],
200+
[(42, 5.0, 5.0)],
141201
columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude']
142202
)
143203

144-
res = process_species(tmpdir_path, tmpdir_path, df)
204+
with pytest.raises(RuntimeError):
205+
_ = process_species(tmpdir_path, tmpdir_path, df)
145206

146-
assert len(res) == len(df)
147-
occurence = res.occurence[0]
148-
assert occurence == expected
207+
def test_empty_species_list() -> None:
208+
df = pd.DataFrame([], columns=['iucn_taxon_id', 'decimalLatitude', 'decimalLongitude'])
209+
with pytest.raises(ValueError):
210+
_ = process_species(Path("/some/aohs"), Path("/some/aohs"), df)

0 commit comments

Comments
 (0)