Skip to content

Commit d6aaf03

Browse files
authored
Numpy v2 support (#112)
* Make dataset-difference independent of numpy array-printout, hence numpy version. * unpin numpy in tests * Add character data difference test.
1 parent 70f9250 commit d6aaf03

File tree

6 files changed

+99
-13
lines changed

6 files changed

+99
-13
lines changed

.github/workflows/ci-tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ jobs:
3535

3636
- name: "Install dependencies"
3737
run: |
38-
conda install --yes "numpy<2" pytest pytest-mock iris xarray filelock requests
38+
conda install --yes numpy pytest pytest-mock iris xarray filelock requests
3939
4040
- name: "Install *latest* Iris"
4141
run: |

lib/ncdata/utils/_compare_nc_datasets.py

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,49 @@ def _attribute_arrays_eq(a1, a2):
148148
return result
149149

150150

151+
def _array_element_str(x):
152+
"""Make a string representation of a numpy array element (scalar).
153+
154+
Does *not* rely on numpy array printing.
155+
Instead converts to an equivalent Python object, and takes str(that).
156+
Hopefully delivers independence of numpy version (a lesson learned the hard way
157+
way in Iris development !)
158+
"""
159+
if not isinstance(x, np.ndarray) or not hasattr(x.dtype, "kind"):
160+
result = str(x)
161+
elif np.ma.is_masked(x):
162+
result = "masked"
163+
else:
164+
kind = x.dtype.kind
165+
if kind in "iu":
166+
result = int(x)
167+
elif kind == "f":
168+
result = float(x)
169+
else:
170+
# Strings, and possibly other things.
171+
# Not totally clear what other things might occur here.
172+
result = str(x)
173+
result = str(result)
174+
return result
175+
176+
177+
def _attribute_str(x):
178+
"""Make a string representing an attribute value.
179+
180+
Like the above, not depending on numpy array printing.
181+
"""
182+
if isinstance(x, str):
183+
result = f"'{x}'"
184+
elif not isinstance(x, np.ndarray):
185+
result = str(x)
186+
elif x.ndim < 1:
187+
result = _array_element_str(x)
188+
else:
189+
els = [_array_element_str(el) for el in x]
190+
result = f"[{', '.join(els)}]"
191+
return result
192+
193+
151194
def _attribute_differences(
152195
obj1,
153196
obj2,
@@ -159,7 +202,7 @@ def _attribute_differences(
159202
"""
160203
Compare attribute name lists.
161204
162-
Does not return results, but appends error messages to 'errs'.
205+
Return a list of error messages.
163206
"""
164207
attrnames, attrnames2 = [
165208
list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs())
@@ -227,7 +270,7 @@ def fix_orders(attrlist):
227270
# N.B. special comparison to handle strings and NaNs
228271
msg = (
229272
f'{elemname} "{attrname}" attribute values differ : '
230-
f"{attr!r} != {attr2!r}"
273+
f"{_attribute_str(attr)} != {_attribute_str(attr2)}"
231274
)
232275
errs.append(msg)
233276
return errs
@@ -404,10 +447,16 @@ def getdata(var):
404447
diffinds = [
405448
np.unravel_index(ind, shape=data.shape) for ind in diffinds
406449
]
407-
diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds)
450+
diffinds_str = ", ".join(
451+
str(tuple([int(ind) for ind in x])) for x in diffinds
452+
)
408453
inds_str = f"[{diffinds_str}{ellps}]"
409-
points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds)
410-
points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds)
454+
points_lhs_str = ", ".join(
455+
_array_element_str(data[ind]) for ind in diffinds
456+
)
457+
points_rhs_str = ", ".join(
458+
_array_element_str(data2[ind]) for ind in diffinds
459+
)
411460
points_lhs_str = f"[{points_lhs_str}{ellps}]"
412461
points_rhs_str = f"[{points_rhs_str}{ellps}]"
413462
msg += (
@@ -435,8 +484,7 @@ def _group_differences(
435484
"""
436485
Inner routine to compare either whole datasets or subgroups.
437486
438-
Note that, rather than returning a list of error strings, it appends them to the
439-
passed arg `errs`. This just makes recursive calling easier.
487+
Returns a list of error strings.
440488
"""
441489
errs = []
442490

tests/unit/core/test_NcAttribute.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,9 @@ def test_str(self, datatype, structuretype):
130130
# All single values appear as scalars.
131131
value = np.array(value).flatten()[0]
132132

133-
value_repr = repr(value)
133+
value_repr = str(value)
134+
if "string" in datatype and not is_multiple:
135+
value_repr = f"'{value_repr}'"
134136

135137
is_non_numpy = "custom" in datatype or "none" in datatype
136138
if is_non_numpy or (is_multiple and "string" not in datatype):

tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self):
257257
assert errs == [
258258
(
259259
'<object attributes> "a" attribute values differ : '
260-
"array([0, 1, 2]) != array([0, 1])"
260+
"[0, 1, 2] != [0, 1]"
261261
)
262262
]
263263

@@ -271,7 +271,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self):
271271
assert errs == [
272272
(
273273
'<object attributes> "a" attribute values differ : '
274-
"array([1, 2, 3]) != array([ 1, 2, 777])"
274+
"[1, 2, 3] != [1, 2, 777]"
275275
)
276276
]
277277

@@ -293,7 +293,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self):
293293
assert errs == [
294294
(
295295
'<object attributes> "a" attribute values differ : '
296-
"array([1., 2., 3.]) != array([ 1., nan, 3.])"
296+
"[1.0, 2.0, 3.0] != [1.0, nan, 3.0]"
297297
)
298298
]
299299

tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ def test_value(self, attr_context):
270270
value_string = "11"
271271
expected = [
272272
f'{self.location_string} "att1" attribute values differ : '
273-
f"array({value_string}) != array(999)"
273+
f"{value_string} != 999"
274274
]
275275
check(errs, expected)
276276

tests/unit/utils/compare_nc_datasets/test_variable_differences.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,3 +303,39 @@ def test_real_and_lazy(self, argtypes):
303303
"@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]"
304304
]
305305
check(errs, expected)
306+
307+
@pytest.mark.parametrize(
308+
"ndiffs", [0, 1, 2], ids=["no_diffs", "one_diff", "two_diffs"]
309+
)
310+
def test_string_data(self, ndiffs):
311+
# FOR NOW test only with character arrays, encoded as expected ("S1" dtype)
312+
strings = ["one", "three", "", "seventeen"]
313+
str_len = max(len(x) for x in strings)
314+
chararray = np.zeros((4, str_len), dtype="S1")
315+
for ind, el in enumerate(strings):
316+
chararray[ind, 0 : len(el)] = list(el)
317+
self.var1, self.var2 = [
318+
NcVariable("vx", ("x"), data=chararray.copy()) for ind in range(2)
319+
]
320+
321+
if ndiffs > 0:
322+
self.var2.data[1, 1] = "X" # modify one character
323+
if ndiffs > 1:
324+
self.var2.data[3, 3:] = "" # (also) cut short this string
325+
326+
# compare + check results
327+
errs = variable_differences(self.var1, self.var2)
328+
329+
expected = []
330+
if ndiffs == 1:
331+
expected = [
332+
'Variable "vx" data contents differ, at 1 points: '
333+
"@INDICES[(1, 1)] : LHS=[b'h'], RHS=[b'X']"
334+
]
335+
elif ndiffs == 2:
336+
expected = [
337+
'Variable "vx" data contents differ, at 7 points: '
338+
"@INDICES[(1, 1), (3, 3), ...] : "
339+
"LHS=[b'h', b'e', ...], RHS=[b'X', b'', ...]"
340+
]
341+
check(errs, expected)

0 commit comments

Comments
 (0)