1111import numpy as np
1212import xarray
1313import xarray .testing
14- from scipy .spatial import ConvexHull
1514from xarray import DataArray
1615
1716from openeo .rest .job import DEFAULT_JOB_RESULTS_FILENAME , BatchJob , JobResults
1817from openeo .util import repr_truncate
1918
2019_log = logging .getLogger (__name__ )
2120
22-
2321_DEFAULT_RTOL = 1e-6
2422_DEFAULT_ATOL = 1e-6
2523
24+ # https://paulbourke.net/dataformats/asciiart
25+ DEFAULT_GRAYSCALE_70_CHARACTERS = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\" ^`'. " [::- 1 ]
26+ DEFAULT_GRAYSCALE_10_CHARACTERS = " .:-=+*#%@"
2627
2728def _load_xarray_netcdf (path : Union [str , Path ], ** kwargs ) -> xarray .Dataset :
2829 """
@@ -91,29 +92,33 @@ def _as_xarray_dataarray(data: Union[str, Path, xarray.DataArray]) -> xarray.Dat
9192 return data
9293
9394
94- def ascii_art (diff_data : DataArray , * , max_width : int = 60 , y_vs_x_aspect_ratio = 2.5 ) -> str :
95+ def _ascii_art (
96+ diff_data : DataArray ,
97+ * ,
98+ max_width : int = 60 ,
99+ y_vs_x_aspect_ratio = 2.5 ,
100+ grayscale_characters : str = DEFAULT_GRAYSCALE_70_CHARACTERS ,
101+ ) -> str :
102+ max_grayscale_idx = len (grayscale_characters ) - 1
95103 x_scale : int = max (1 , int (diff_data .sizes ["x" ] / max_width ))
96- y_scale : int = max (1 , int (diff_data .sizes ["x " ] / (max_width * y_vs_x_aspect_ratio )))
104+ y_scale : int = max (1 , int (diff_data .sizes ["y " ] / (max_width / y_vs_x_aspect_ratio )))
97105 data_max = diff_data .max ().item ()
98106 if data_max == 0 :
99107 data_max = 1
100- grayscale_characters = "$@B%8&WM#*oahkbdpqwmZO0QLCJUYXzcvunxrjft/\|()1{}[]?-_+~<>i!lI;:,\" ^`'. "
101108 coarsened = diff_data .coarsen (dim = {"x" : x_scale , "y" : y_scale }, boundary = "pad" ).mean ()
102109 coarsened = coarsened .transpose ("y" , "x" , ...)
103110 top = "┌" + "─" * coarsened .sizes ["x" ] + "┐\n "
104111 bottom = "\n └" + "─" * coarsened .sizes ["x" ] + "┘"
105112
106- def pixelChar (v ) -> str :
107- if np .isnan (v ):
108- return " "
109- i = int (v * 69 / data_max )
113+ def _pixel_char (v ) -> str :
114+ i = 0 if np .isnan (v ) else int (v * max_grayscale_idx / data_max )
110115 if v > 0 and i == 0 :
111- i = 1
116+ i = 1 # don't show a blank for a difference above the threshold
112117 else :
113- i = min (69 , i )
114- return grayscale_characters [69 - i ]
118+ i = min (max_grayscale_idx , i )
119+ return grayscale_characters [i ]
115120
116- return top + "\n " .join (["│" + "" .join ([pixelChar (v ) for v in row ]) + "│" for row in coarsened ]) + bottom
121+ return top + "\n " .join (["│" + "" .join ([_pixel_char (v ) for v in row ]) + "│" for row in coarsened ]) + bottom
117122
118123
119124def _compare_xarray_dataarray_xy (
@@ -122,96 +127,60 @@ def _compare_xarray_dataarray_xy(
122127 * ,
123128 rtol : float = _DEFAULT_RTOL ,
124129 atol : float = _DEFAULT_ATOL ,
130+ name : str = None ,
125131) -> List [str ]:
126132 """
127- Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
128-
129- Checks that are done (with tolerance):
130- - (optional) Check fraction of mismatching pixels (difference exceeding some tolerance).
131- If fraction is below a given threshold, ignore these mismatches in subsequent comparisons.
132- If fraction is above the threshold, report this issue.
133- - Compare actual and expected data with `xarray.testing.assert_allclose` and specified tolerances.
134-
133+ Additional compare for two compatible spatial xarray DataArrays with tolerance (rtol, atol)
135134 :return: list of issues (empty if no issues)
136135 """
137- # TODO: make this a public function?
138- # TODO: option for nodata fill value?
139- # TODO: option to include data type check?
140- # TODO: option to cast to some data type (or even rescale) before comparison?
141- # TODO: also compare attributes of the DataArray?
142- actual = _as_xarray_dataarray (actual )
143- expected = _as_xarray_dataarray (expected )
144136 issues = []
145-
146- if actual .dims != expected .dims :
147- issues .append (f"Dimension mismatch: { actual .dims } != { expected .dims } " )
148- for dim in sorted (set (expected .dims ).intersection (actual .dims )):
149- acs = actual .coords [dim ].values
150- ecs = expected .coords [dim ].values
151- if not (acs .shape == ecs .shape and (acs == ecs ).all ()):
152- issues .append (f"Coordinates mismatch for dimension { dim !r} : { acs } != { ecs } " )
153- if actual .shape != expected .shape :
154- issues .append (f"Shape mismatch: { actual .shape } != { expected .shape } " )
155-
156- if not issues :
157- threshold = abs (expected * rtol ) + atol
158- diff_exact = abs (expected - actual )
159- diff_mask = diff_exact > threshold
160- diff_lenient = diff_exact .where (diff_mask )
161-
162- non_x_y_dims = list (set (expected .dims ) - {"x" , "y" })
163- value_mapping = dict (map (lambda d : (d , expected [d ].data ), non_x_y_dims ))
164- shape = tuple ([len (value_mapping [x ]) for x in non_x_y_dims ])
165-
166- for shape_index , v in np .ndenumerate (np .ndarray (shape )):
167- indexers = {}
168- for index , value_index in enumerate (shape_index ):
169- indexers [non_x_y_dims [index ]] = value_mapping [non_x_y_dims [index ]][value_index ]
170- diff_data = diff_lenient .sel (indexers = indexers )
171- total_pixel_count = expected .sel (indexers ).count ().item ()
172- diff_pixel_count = diff_data .count ().item ()
173-
174- if diff_pixel_count > 0 :
175- diff_pixel_percentage = round (diff_pixel_count * 100 / total_pixel_count , 1 )
176- diff_mean = round (diff_data .mean ().item (), 2 )
177- diff_var = round (diff_data .var ().item (), 2 )
178-
179- key = "," .join ([f"{ k } { str (v1 )} " for k , v1 in indexers .items ()])
180- issues .append (
181- f"{ key } : value difference exceeds tolerance (rtol { rtol } , atol { atol } ), min:{ diff_data .min ().data } , max: { diff_data .max ().data } , mean: { diff_mean } , var: { diff_var } "
182- )
183-
184- coord_grid = np .meshgrid (diff_data .coords ["x" ], diff_data .coords ["y" ])
185-
186- mask = diff_data .notnull ()
187- if mask .dims [0 ] != "y" :
188- mask = mask .transpose ()
189- c1 = coord_grid [0 ][mask ]
190- c2 = coord_grid [1 ][mask ]
191- coordinates = np .dstack ((c1 , c2 )).reshape (- 1 , 2 )
192-
193- art = ascii_art (diff_data )
194- print (f"Difference ascii art for { key } " )
195- print (art )
196-
197- if len (coordinates ) > 2 :
198- hull = ConvexHull (coordinates )
199- area = hull .volume
200-
201- x_m = diff_data .coords ["x" ][0 ].data
202- x_M = diff_data .coords ["x" ][- 1 ].data
203- y_m = diff_data .coords ["y" ][0 ].data
204- y_M = diff_data .coords ["y" ][- 1 ].data
205-
206- total_area = abs ((y_M - y_m ) * (x_M - x_m ))
207- area_percentage = round (area * 100 / total_area , 1 )
208- issues .append (
209- f"{ key } : differing pixels: { diff_pixel_count } /{ total_pixel_count } ({ diff_pixel_percentage } %), spread over { area_percentage } % of the area"
210- )
211- else :
212- issues .append (
213- f"{ key } : differing pixels: { diff_pixel_count } /{ total_pixel_count } ({ diff_pixel_percentage } %)"
214- )
137+ threshold = abs (expected * rtol ) + atol
138+ diff_exact = abs (expected - actual )
139+ diff_mask = diff_exact > threshold
140+ diff_lenient = diff_exact .where (diff_mask )
141+
142+ non_x_y_dims = list (set (expected .dims ) - {"x" , "y" })
143+ value_mapping = dict (map (lambda d : (d , expected [d ].data ), non_x_y_dims ))
144+ shape = tuple ([len (value_mapping [x ]) for x in non_x_y_dims ])
145+
146+ for shape_index , v in np .ndenumerate (np .ndarray (shape )):
147+ indexers = {}
148+ for index , value_index in enumerate (shape_index ):
149+ indexers [non_x_y_dims [index ]] = value_mapping [non_x_y_dims [index ]][value_index ]
150+ diff_data = diff_lenient .sel (indexers = indexers )
151+ total_pixel_count = expected .sel (indexers ).count ().item ()
152+ diff_pixel_count = diff_data .count ().item ()
153+
154+ if diff_pixel_count > 0 :
155+ diff_pixel_percentage = round (diff_pixel_count * 100 / total_pixel_count , 1 )
156+ diff_mean = round (diff_data .mean ().item (), 2 )
157+ diff_var = round (diff_data .var ().item (), 2 )
158+
159+ key = name + ": " if name else ""
160+ key += "," .join ([f"{ k } { str (v1 )} " for k , v1 in indexers .items ()])
161+ issues .append (
162+ f"{ key } : value difference exceeds tolerance (rtol { rtol } , atol { atol } ), min:{ diff_data .min ().data } , max: { diff_data .max ().data } , mean: { diff_mean } , var: { diff_var } "
163+ )
164+
165+ _log .warning (f"Difference (ascii art) for { key } :\n { _ascii_art (diff_data )} " )
166+
167+ coord_grid = np .meshgrid (diff_data .coords ["x" ], diff_data .coords ["y" ])
168+ mask = diff_data .notnull ()
169+ if mask .dims [0 ] != "y" :
170+ mask = mask .transpose ()
171+ x_coords = coord_grid [0 ][mask ]
172+ y_coords = coord_grid [1 ][mask ]
173+
174+ diff_bbox = ((x_coords .min ().item (), y_coords .min ().item ()), (x_coords .max ().item (), y_coords .max ().item ()))
175+ diff_area = (x_coords .max () - x_coords .min ()) * (y_coords .max () - y_coords .min ())
176+ total_area = abs (
177+ (diff_data .coords ["y" ][- 1 ].data - diff_data .coords ["y" ][0 ].data )
178+ * (diff_data .coords ["x" ][- 1 ].data - diff_data .coords ["x" ][0 ].data )
179+ )
180+ area_percentage = round (diff_area * 100 / total_area , 1 )
181+ issues .append (
182+ f"{ key } : differing pixels: { diff_pixel_count } /{ total_pixel_count } ({ diff_pixel_percentage } %), bbox { diff_bbox } - { area_percentage } % of the area"
183+ )
215184 return issues
216185
217186
@@ -221,6 +190,7 @@ def _compare_xarray_dataarray(
221190 * ,
222191 rtol : float = _DEFAULT_RTOL ,
223192 atol : float = _DEFAULT_ATOL ,
193+ name : str = None ,
224194) -> List [str ]:
225195 """
226196 Compare two xarray DataArrays with tolerance and report mismatch issues (as strings)
@@ -243,7 +213,7 @@ def _compare_xarray_dataarray(
243213 issues = []
244214
245215 # `xarray.testing.assert_allclose` currently does not always
246- # provides detailed information about shape/dimension mismatches
216+ # provide detailed information about shape/dimension mismatches
247217 # so we enrich the issue listing with some more details
248218 if actual .dims != expected .dims :
249219 issues .append (f"Dimension mismatch: { actual .dims } != { expected .dims } " )
@@ -254,17 +224,14 @@ def _compare_xarray_dataarray(
254224 issues .append (f"Coordinates mismatch for dimension { dim !r} : { acs } != { ecs } " )
255225 if actual .shape != expected .shape :
256226 issues .append (f"Shape mismatch: { actual .shape } != { expected .shape } " )
257-
258- if not issues :
259- if {"x" , "y" } <= set (expected .dims ):
260- issues = _compare_xarray_dataarray_xy (actual = actual , expected = expected , rtol = rtol , atol = atol )
261- else :
262- try :
263- xarray .testing .assert_allclose (a = actual , b = expected , rtol = rtol , atol = atol )
264- except AssertionError as e :
265- # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
266- issues .append (str (e ).strip ())
267-
227+ compatible = len (issues ) == 0
228+ try :
229+ xarray .testing .assert_allclose (a = actual , b = expected , rtol = rtol , atol = atol )
230+ except AssertionError as e :
231+ # TODO: message of `assert_allclose` is typically multiline, split it again or make it one line?
232+ issues .append (str (e ).strip ())
233+ if compatible and {"x" , "y" } <= set (expected .dims ):
234+ issues .extend (_compare_xarray_dataarray_xy (actual = actual , expected = expected , rtol = rtol , atol = atol , name = name ))
268235 return issues
269236
270237
@@ -293,32 +260,6 @@ def assert_xarray_dataarray_allclose(
293260 if issues :
294261 raise AssertionError ("\n " .join (issues ))
295262
296-
297- def assert_xarray_dataarray_allclose_xy (
298- actual : Union [xarray .DataArray , str , Path ],
299- expected : Union [xarray .DataArray , str , Path ],
300- * ,
301- rtol : float = _DEFAULT_RTOL ,
302- atol : float = _DEFAULT_ATOL ,
303- ):
304- """
305- Assert that two Xarray ``DataArray`` instances are equal (with tolerance).
306-
307- :param actual: actual data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
308- :param expected: expected or reference data, provided as Xarray DataArray object or path to NetCDF/GeoTIFF file.
309- :param rtol: relative tolerance
310- :param atol: absolute tolerance
311- :raises AssertionError: if not equal within the given tolerance
312-
313- .. versionadded:: 0.31.0
314-
315- .. warning::
316- This function is experimental and subject to change.
317- """
318- issues = _compare_xarray_dataarray_xy (actual = actual , expected = expected , rtol = rtol , atol = atol )
319- if issues :
320- raise AssertionError ("\n " .join (issues ))
321-
322263def _compare_xarray_datasets (
323264 actual : Union [xarray .Dataset , str , Path ],
324265 expected : Union [xarray .Dataset , str , Path ],
@@ -336,15 +277,14 @@ def _compare_xarray_datasets(
336277 expected = _as_xarray_dataset (expected )
337278
338279 all_issues = []
339- # TODO: just leverage DataSet support in xarray.testing.assert_allclose for all this?
340280 actual_vars = set (actual .data_vars )
341281 expected_vars = set (expected .data_vars )
342282 _log .debug (f"_compare_xarray_datasets: actual_vars={ actual_vars !r} expected_vars={ expected_vars !r} " )
343283 if actual_vars != expected_vars :
344284 all_issues .append (f"Xarray DataSet variables mismatch: { actual_vars } != { expected_vars } " )
345285 for var in expected_vars .intersection (actual_vars ):
346286 _log .debug (f"_compare_xarray_datasets: comparing variable { var !r} " )
347- issues = _compare_xarray_dataarray (actual [var ], expected [var ], rtol = rtol , atol = atol )
287+ issues = _compare_xarray_dataarray (actual [var ], expected [var ], rtol = rtol , atol = atol , name = var )
348288 if issues :
349289 all_issues .append (f"Issues for variable { var !r} :" )
350290 all_issues .extend (issues )
@@ -406,10 +346,7 @@ def assert_xarray_allclose(
406346 if isinstance (actual , xarray .Dataset ) and isinstance (expected , xarray .Dataset ):
407347 assert_xarray_dataset_allclose (actual , expected , rtol = rtol , atol = atol )
408348 elif isinstance (actual , xarray .DataArray ) and isinstance (expected , xarray .DataArray ):
409- if {"x" , "y" }.issubset (expected .dims ):
410- assert_xarray_dataarray_allclose_xy (actual , expected , rtol = rtol , atol = atol )
411- else :
412- assert_xarray_dataarray_allclose (actual , expected , rtol = rtol , atol = atol )
349+ assert_xarray_dataarray_allclose (actual , expected , rtol = rtol , atol = atol )
413350 else :
414351 raise ValueError (f"Unsupported types: { type (actual )} and { type (expected )} " )
415352
0 commit comments