Skip to content

Commit fcad19e

Browse files
committed
Issue #761 better diff for apex reference check (replaced convex hull with bbox, _compare_xarray_dataarray_xy now only adds to the original xarray implementation)
1 parent 5bcc5e7 commit fcad19e

File tree

3 files changed

+100
-147
lines changed

3 files changed

+100
-147
lines changed

openeo/testing/results.py

Lines changed: 79 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,19 @@
1111
import numpy as np
1212
import xarray
1313
import xarray.testing
14-
from scipy.spatial import ConvexHull
1514
from xarray import DataArray
1615

1716
from openeo.rest.job import DEFAULT_JOB_RESULTS_FILENAME, BatchJob, JobResults
1817
from openeo.util import repr_truncate
1918

2019
_log = logging.getLogger(__name__)
2120

22-
2321
_DEFAULT_RTOL = 1e-6
2422
_DEFAULT_ATOL = 1e-6
2523

24+
# https://paulbourke.net/dataformats/asciiart
25+
DEFAULT_GRAYSCALE_70_CHARACTERS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "[::-1]
26+
DEFAULT_GRAYSCALE_10_CHARACTERS = " .:-=+*#%@"
2627

2728
def _load_xarray_netcdf(path: Union[str, Path], **kwargs) -> xarray.Dataset:
2829
"""
@@ -91,29 +92,33 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
9192
return data
9293

9394

94-
def ascii_art(diff_data: DataArray, *, max_width: int = 60, y_vs_x_aspect_ratio=2.5) -> str:
95+
def _ascii_art(
96+
diff_data: DataArray,
97+
*,
98+
max_width: int = 60,
99+
y_vs_x_aspect_ratio=2.5,
100+
grayscale_characters: str = DEFAULT_GRAYSCALE_70_CHARACTERS,
101+
) -> str:
102+
max_grayscale_idx = len(grayscale_characters) - 1
95103
x_scale: int = max(1, int(diff_data.sizes["x"] / max_width))
96-
y_scale: int = max(1, int(diff_data.sizes["x"] / (max_width * y_vs_x_aspect_ratio)))
104+
y_scale: int = max(1, int(diff_data.sizes["y"] / (max_width / y_vs_x_aspect_ratio)))
97105
data_max = diff_data.max().item()
98106
if data_max == 0:
99107
data_max = 1
100-
grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\"^`'. "
101108
coarsened = diff_data.coarsen(dim={"x": x_scale, "y": y_scale}, boundary="pad").mean()
102109
coarsened = coarsened.transpose("y", "x", ...)
103110
top = "┌" + "─" * coarsened.sizes["x"] + "┐\n"
104111
bottom = "\n└" + "─" * coarsened.sizes["x"] + "┘"
105112

106-
def pixelChar(v) -> str:
107-
if np.isnan(v):
108-
return " "
109-
i = int(v * 69 / data_max)
113+
def _pixel_char(v) -> str:
114+
i = 0 if np.isnan(v) else int(v * max_grayscale_idx / data_max)
110115
if v > 0 and i == 0:
111-
i = 1
116+
i = 1 # don't show a blank for a difference above the threshold
112117
else:
113-
i = min(69, i)
114-
return grayscale_characters[69 - i]
118+
i = min(max_grayscale_idx, i)
119+
return grayscale_characters[i]
115120

116-
return top + "\n".join(["│" + "".join([pixelChar(v) for v in row]) + "│" for row in coarsened]) + bottom
121+
return top + "\n".join(["│" + "".join([_pixel_char(v) for v in row]) + "│" for row in coarsened]) + bottom
117122

118123

119124
def _compare_xarray_dataarray_xy(
@@ -122,96 +127,60 @@ def _compare_xarray_dataarray_xy(
122127
*,
123128
rtol: float = _DEFAULT_RTOL,
124129
atol: float = _DEFAULT_ATOL,
130+
name: str = None,
125131
) -> List[str]:
126132
"""
127-
Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
128-
129-
Checks that are done (with tolerance):
130-
- (optional) Check fraction of mismatching pixels (difference exceeding some tolerance).
131-
If fraction is below a given threshold, ignore these mismatches in subsequent comparisons.
132-
If fraction is above the threshold, report this issue.
133-
- Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances.
134-
133+
Additional compare for two compatible spatial xarray DataArrays with tolerance (rtol, atol)
135134
:return: list of issues (empty if no issues)
136135
"""
137-
# TODO: make this a public function?
138-
# TODO: option for nodata fill value?
139-
# TODO: option to include data type check?
140-
# TODO: option to cast to some data type (or even rescale) before comparison?
141-
# TODO: also compare attributes of the DataArray?
142-
actual = _as_xarray_dataarray(actual)
143-
expected = _as_xarray_dataarray(expected)
144136
issues = []
145-
146-
if actual.dims != expected.dims:
147-
issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
148-
for dim in sorted(set(expected.dims).intersection(actual.dims)):
149-
acs = actual.coords[dim].values
150-
ecs = expected.coords[dim].values
151-
if not (acs.shape == ecs.shape and (acs == ecs).all()):
152-
issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
153-
if actual.shape != expected.shape:
154-
issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
155-
156-
if not issues:
157-
threshold = abs(expected * rtol) + atol
158-
diff_exact = abs(expected - actual)
159-
diff_mask = diff_exact > threshold
160-
diff_lenient = diff_exact.where(diff_mask)
161-
162-
non_x_y_dims = list(set(expected.dims) - {"x", "y"})
163-
value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
164-
shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
165-
166-
for shape_index, v in np.ndenumerate(np.ndarray(shape)):
167-
indexers = {}
168-
for index, value_index in enumerate(shape_index):
169-
indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
170-
diff_data = diff_lenient.sel(indexers=indexers)
171-
total_pixel_count = expected.sel(indexers).count().item()
172-
diff_pixel_count = diff_data.count().item()
173-
174-
if diff_pixel_count > 0:
175-
diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
176-
diff_mean = round(diff_data.mean().item(), 2)
177-
diff_var = round(diff_data.var().item(), 2)
178-
179-
key = ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
180-
issues.append(
181-
f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
182-
)
183-
184-
coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
185-
186-
mask = diff_data.notnull()
187-
if mask.dims[0] != "y":
188-
mask = mask.transpose()
189-
c1 = coord_grid[0][mask]
190-
c2 = coord_grid[1][mask]
191-
coordinates = np.dstack((c1, c2)).reshape(-1, 2)
192-
193-
art = ascii_art(diff_data)
194-
print(f"Difference ascii art for {key}")
195-
print(art)
196-
197-
if len(coordinates) > 2:
198-
hull = ConvexHull(coordinates)
199-
area = hull.volume
200-
201-
x_m = diff_data.coords["x"][0].data
202-
x_M = diff_data.coords["x"][-1].data
203-
y_m = diff_data.coords["y"][0].data
204-
y_M = diff_data.coords["y"][-1].data
205-
206-
total_area = abs((y_M - y_m) * (x_M - x_m))
207-
area_percentage = round(area * 100 / total_area, 1)
208-
issues.append(
209-
f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), spread over {area_percentage}% of the area"
210-
)
211-
else:
212-
issues.append(
213-
f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%)"
214-
)
137+
threshold = abs(expected * rtol) + atol
138+
diff_exact = abs(expected - actual)
139+
diff_mask = diff_exact > threshold
140+
diff_lenient = diff_exact.where(diff_mask)
141+
142+
non_x_y_dims = list(set(expected.dims) - {"x", "y"})
143+
value_mapping = dict(map(lambda d: (d, expected[d].data), non_x_y_dims))
144+
shape = tuple([len(value_mapping[x]) for x in non_x_y_dims])
145+
146+
for shape_index, v in np.ndenumerate(np.ndarray(shape)):
147+
indexers = {}
148+
for index, value_index in enumerate(shape_index):
149+
indexers[non_x_y_dims[index]] = value_mapping[non_x_y_dims[index]][value_index]
150+
diff_data = diff_lenient.sel(indexers=indexers)
151+
total_pixel_count = expected.sel(indexers).count().item()
152+
diff_pixel_count = diff_data.count().item()
153+
154+
if diff_pixel_count > 0:
155+
diff_pixel_percentage = round(diff_pixel_count * 100 / total_pixel_count, 1)
156+
diff_mean = round(diff_data.mean().item(), 2)
157+
diff_var = round(diff_data.var().item(), 2)
158+
159+
key = name + ": " if name else ""
160+
key += ",".join([f"{k} {str(v1)}" for k, v1 in indexers.items()])
161+
issues.append(
162+
f"{key}: value difference exceeds tolerance (rtol {rtol}, atol {atol}), min:{diff_data.min().data}, max: {diff_data.max().data}, mean: {diff_mean}, var: {diff_var}"
163+
)
164+
165+
_log.warning(f"Difference (ascii art) for {key}:\n{_ascii_art(diff_data)}")
166+
167+
coord_grid = np.meshgrid(diff_data.coords["x"], diff_data.coords["y"])
168+
mask = diff_data.notnull()
169+
if mask.dims[0] != "y":
170+
mask = mask.transpose()
171+
x_coords = coord_grid[0][mask]
172+
y_coords = coord_grid[1][mask]
173+
174+
diff_bbox = ((x_coords.min().item(), y_coords.min().item()), (x_coords.max().item(), y_coords.max().item()))
175+
diff_area = (x_coords.max() - x_coords.min()) * (y_coords.max() - y_coords.min())
176+
total_area = abs(
177+
(diff_data.coords["y"][-1].data - diff_data.coords["y"][0].data)
178+
* (diff_data.coords["x"][-1].data - diff_data.coords["x"][0].data)
179+
)
180+
area_percentage = round(diff_area * 100 / total_area, 1)
181+
issues.append(
182+
f"{key}: differing pixels: {diff_pixel_count}/{total_pixel_count} ({diff_pixel_percentage}%), bbox {diff_bbox} - {area_percentage}% of the area"
183+
)
215184
return issues
216185

217186

@@ -221,6 +190,7 @@ def _compare_xarray_dataarray(
221190
*,
222191
rtol: float = _DEFAULT_RTOL,
223192
atol: float = _DEFAULT_ATOL,
193+
name: str = None,
224194
) -> List[str]:
225195
"""
226196
Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
@@ -243,7 +213,7 @@ def _compare_xarray_dataarray(
243213
issues = []
244214

245215
# `xarray.testing.assert_allclose` currently does not always
246-
# provides detailed information about shape/dimension mismatches
216+
# provide detailed information about shape/dimension mismatches
247217
# so we enrich the issue listing with some more details
248218
if actual.dims != expected.dims:
249219
issues.append(f"Dimension mismatch: {actual.dims} != {expected.dims}")
@@ -254,17 +224,14 @@ def _compare_xarray_dataarray(
254224
issues.append(f"Coordinates mismatch for dimension {dim!r}: {acs} != {ecs}")
255225
if actual.shape != expected.shape:
256226
issues.append(f"Shape mismatch: {actual.shape} != {expected.shape}")
257-
258-
if not issues:
259-
if {"x", "y"} <= set(expected.dims):
260-
issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
261-
else:
262-
try:
263-
xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
264-
except AssertionError as e:
265-
# TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
266-
issues.append(str(e).strip())
267-
227+
compatible = len(issues) == 0
228+
try:
229+
xarray.testing.assert_allclose(a=actual, b=expected, rtol=rtol, atol=atol)
230+
except AssertionError as e:
231+
# TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
232+
issues.append(str(e).strip())
233+
if compatible and {"x", "y"} <= set(expected.dims):
234+
issues.extend(_compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol, name=name))
268235
return issues
269236

270237

@@ -293,32 +260,6 @@ def assert_xarray_dataarray_allclose(
293260
if issues:
294261
raise AssertionError("\n".join(issues))
295262

296-
297-
def assert_xarray_dataarray_allclose_xy(
298-
actual: Union[xarray.DataArray, str, Path],
299-
expected: Union[xarray.DataArray, str, Path],
300-
*,
301-
rtol: float = _DEFAULT_RTOL,
302-
atol: float = _DEFAULT_ATOL,
303-
):
304-
"""
305-
Assert that two Xarray ``DataArray`` instances are equal (with tolerance).
306-
307-
:param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
308-
:param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
309-
:param rtol: relative tolerance
310-
:param atol: absolute tolerance
311-
:raises AssertionError: if not equal within the given tolerance
312-
313-
.. versionadded:: 0.31.0
314-
315-
.. warning::
316-
This function is experimental and subject to change.
317-
"""
318-
issues = _compare_xarray_dataarray_xy(actual=actual, expected=expected, rtol=rtol, atol=atol)
319-
if issues:
320-
raise AssertionError("\n".join(issues))
321-
322263
def _compare_xarray_datasets(
323264
actual: Union[xarray.Dataset, str, Path],
324265
expected: Union[xarray.Dataset, str, Path],
@@ -336,15 +277,14 @@ def _compare_xarray_datasets(
336277
expected = _as_xarray_dataset(expected)
337278

338279
all_issues = []
339-
# TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this?
340280
actual_vars = set(actual.data_vars)
341281
expected_vars = set(expected.data_vars)
342282
_log.debug(f"_compare_xarray_datasets: actual_vars={actual_vars!r} expected_vars={expected_vars!r}")
343283
if actual_vars != expected_vars:
344284
all_issues.append(f"Xarray DataSet variables mismatch: {actual_vars} != {expected_vars}")
345285
for var in expected_vars.intersection(actual_vars):
346286
_log.debug(f"_compare_xarray_datasets: comparing variable {var!r}")
347-
issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol)
287+
issues = _compare_xarray_dataarray(actual[var], expected[var], rtol=rtol, atol=atol, name=var)
348288
if issues:
349289
all_issues.append(f"Issues for variable {var!r}:")
350290
all_issues.extend(issues)
@@ -406,10 +346,7 @@ def assert_xarray_allclose(
406346
if isinstance(actual, xarray.Dataset) and isinstance(expected, xarray.Dataset):
407347
assert_xarray_dataset_allclose(actual, expected, rtol=rtol, atol=atol)
408348
elif isinstance(actual, xarray.DataArray) and isinstance(expected, xarray.DataArray):
409-
if {"x", "y"}.issubset(expected.dims):
410-
assert_xarray_dataarray_allclose_xy(actual, expected, rtol=rtol, atol=atol)
411-
else:
412-
assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
349+
assert_xarray_dataarray_allclose(actual, expected, rtol=rtol, atol=atol)
413350
else:
414351
raise ValueError(f"Unsupported types: {type(actual)} and {type(expected)}")
415352

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,6 @@
8282
"deprecated>=1.2.12",
8383
'oschmod>=0.3.12; sys_platform == "win32"',
8484
"importlib_resources; python_version<'3.9'",
85-
"scipy", # for Convex Hull algorithm
8685
],
8786
extras_require={
8887
"tests": tests_require,

0 commit comments

Comments
 (0)