Numpy v2 support (#112)

pp-mo · web-flow · commit d6aaf037cbac · 2025-02-07T17:25:27.000Z
* Make dataset-difference independent of numpy array-printout, hence numpy version.

* unpin numpy in tests

* Add character data difference test.
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -35,7 +35,7 @@ jobs:
 
       - name: "Install dependencies"
         run: |
-          conda install --yes "numpy<2" pytest pytest-mock iris xarray filelock requests
+          conda install --yes numpy pytest pytest-mock iris xarray filelock requests
 
       - name: "Install *latest* Iris"
         run: |
diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py
@@ -148,6 +148,49 @@ def _attribute_arrays_eq(a1, a2):
     return result
 
 
+def _array_element_str(x):
+    """Make a string representation of a numpy array element (scalar).
+
+    Does *not* rely on numpy array printing.
+    Instead converts to an equivalent Python object, and takes str(that).
+    Hopefully delivers independence of numpy version (a lesson learned the hard way
+    way in Iris development !)
+    """
+    if not isinstance(x, np.ndarray) or not hasattr(x.dtype, "kind"):
+        result = str(x)
+    elif np.ma.is_masked(x):
+        result = "masked"
+    else:
+        kind = x.dtype.kind
+        if kind in "iu":
+            result = int(x)
+        elif kind == "f":
+            result = float(x)
+        else:
+            # Strings, and possibly other things.
+            # Not totally clear what other things might occur here.
+            result = str(x)
+        result = str(result)
+    return result
+
+
+def _attribute_str(x):
+    """Make a string representing an attribute value.
+
+    Like the above, not depending on numpy array printing.
+    """
+    if isinstance(x, str):
+        result = f"'{x}'"
+    elif not isinstance(x, np.ndarray):
+        result = str(x)
+    elif x.ndim < 1:
+        result = _array_element_str(x)
+    else:
+        els = [_array_element_str(el) for el in x]
+        result = f"[{', '.join(els)}]"
+    return result
+
+
 def _attribute_differences(
     obj1,
     obj2,
@@ -159,7 +202,7 @@ def _attribute_differences(
     """
     Compare attribute name lists.
 
-    Does not return results, but appends error messages to 'errs'.
+    Return a list of error messages.
     """
     attrnames, attrnames2 = [
         list(obj.attributes.keys()) if _isncdata(obj) else list(obj.ncattrs())
@@ -227,7 +270,7 @@ def fix_orders(attrlist):
                 # N.B. special comparison to handle strings and NaNs
                 msg = (
                     f'{elemname} "{attrname}" attribute values differ : '
-                    f"{attr!r} != {attr2!r}"
+                    f"{_attribute_str(attr)} != {_attribute_str(attr2)}"
                 )
                 errs.append(msg)
     return errs
@@ -404,10 +447,16 @@ def getdata(var):
             diffinds = [
                 np.unravel_index(ind, shape=data.shape) for ind in diffinds
             ]
-            diffinds_str = ", ".join(repr(tuple(x)) for x in diffinds)
+            diffinds_str = ", ".join(
+                str(tuple([int(ind) for ind in x])) for x in diffinds
+            )
             inds_str = f"[{diffinds_str}{ellps}]"
-            points_lhs_str = ", ".join(repr(data[ind]) for ind in diffinds)
-            points_rhs_str = ", ".join(repr(data2[ind]) for ind in diffinds)
+            points_lhs_str = ", ".join(
+                _array_element_str(data[ind]) for ind in diffinds
+            )
+            points_rhs_str = ", ".join(
+                _array_element_str(data2[ind]) for ind in diffinds
+            )
             points_lhs_str = f"[{points_lhs_str}{ellps}]"
             points_rhs_str = f"[{points_rhs_str}{ellps}]"
             msg += (
@@ -435,8 +484,7 @@ def _group_differences(
     """
     Inner routine to compare either whole datasets or subgroups.
 
-    Note that, rather than returning a list of error strings, it appends them to the
-    passed arg `errs`.  This just makes recursive calling easier.
+    Returns a list of error strings.
     """
     errs = []
 
diff --git a/tests/unit/core/test_NcAttribute.py b/tests/unit/core/test_NcAttribute.py
@@ -130,7 +130,9 @@ def test_str(self, datatype, structuretype):
             # All single values appear as scalars.
             value = np.array(value).flatten()[0]
 
-        value_repr = repr(value)
+        value_repr = str(value)
+        if "string" in datatype and not is_multiple:
+            value_repr = f"'{value_repr}'"
 
         is_non_numpy = "custom" in datatype or "none" in datatype
         if is_non_numpy or (is_multiple and "string" not in datatype):
diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__additional.py
@@ -257,7 +257,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self):
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
-                "array([0, 1, 2]) != array([0, 1])"
+                "[0, 1, 2] != [0, 1]"
             )
         ]
 
@@ -271,7 +271,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self):
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
-                "array([1, 2, 3]) != array([  1,   2, 777])"
+                "[1, 2, 3] != [1, 2, 777]"
             )
         ]
 
@@ -293,7 +293,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self):
         assert errs == [
             (
                 '<object attributes> "a" attribute values differ : '
-                "array([1., 2., 3.]) != array([ 1., nan,  3.])"
+                "[1.0, 2.0, 3.0] != [1.0, nan, 3.0]"
             )
         ]
 
diff --git a/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py b/tests/unit/utils/compare_nc_datasets/test_dataset_differences__mainfunctions.py
@@ -270,7 +270,7 @@ def test_value(self, attr_context):
             value_string = "11"
         expected = [
             f'{self.location_string} "att1" attribute values differ : '
-            f"array({value_string}) != array(999)"
+            f"{value_string} != 999"
         ]
         check(errs, expected)
 
diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
@@ -303,3 +303,39 @@ def test_real_and_lazy(self, argtypes):
             "@INDICES[(1,)] : LHS=[1.0], RHS=[2.0]"
         ]
         check(errs, expected)
+
+    @pytest.mark.parametrize(
+        "ndiffs", [0, 1, 2], ids=["no_diffs", "one_diff", "two_diffs"]
+    )
+    def test_string_data(self, ndiffs):
+        # FOR NOW test only with character arrays, encoded as expected ("S1" dtype)
+        strings = ["one", "three", "", "seventeen"]
+        str_len = max(len(x) for x in strings)
+        chararray = np.zeros((4, str_len), dtype="S1")
+        for ind, el in enumerate(strings):
+            chararray[ind, 0 : len(el)] = list(el)
+        self.var1, self.var2 = [
+            NcVariable("vx", ("x"), data=chararray.copy()) for ind in range(2)
+        ]
+
+        if ndiffs > 0:
+            self.var2.data[1, 1] = "X"  # modify one character
+        if ndiffs > 1:
+            self.var2.data[3, 3:] = ""  # (also) cut short this string
+
+        # compare + check results
+        errs = variable_differences(self.var1, self.var2)
+
+        expected = []
+        if ndiffs == 1:
+            expected = [
+                'Variable "vx" data contents differ, at 1 points: '
+                "@INDICES[(1, 1)] : LHS=[b'h'], RHS=[b'X']"
+            ]
+        elif ndiffs == 2:
+            expected = [
+                'Variable "vx" data contents differ, at 7 points: '
+                "@INDICES[(1, 1), (3, 3), ...] : "
+                "LHS=[b'h', b'e', ...], RHS=[b'X', b'', ...]"
+            ]
+        check(errs, expected)

Original file line number	Diff line number	Diff line change
`@@ -257,7 +257,7 @@ def test_compare_attributes_values__data_arrays_shape_mismatch(self):`
`257`	`257`	`assert errs == [`
`258`	`258`	`(`
`259`	`259`	`'<object attributes> "a" attribute values differ : '`
`260`		`- "array([0, 1, 2]) != array([0, 1])"`
	`260`	`+ "[0, 1, 2] != [0, 1]"`
`261`	`261`	`)`
`262`	`262`	`]`
`263`	`263`
`@@ -271,7 +271,7 @@ def test_compare_attributes_values__data_arrays_value_mismatch(self):`
`271`	`271`	`assert errs == [`
`272`	`272`	`(`
`273`	`273`	`'<object attributes> "a" attribute values differ : '`
`274`		`- "array([1, 2, 3]) != array([ 1, 2, 777])"`
	`274`	`+ "[1, 2, 3] != [1, 2, 777]"`
`275`	`275`	`)`
`276`	`276`	`]`
`277`	`277`
`@@ -293,7 +293,7 @@ def test_compare_attributes_values__data_arrays_nans_mismatch(self):`
`293`	`293`	`assert errs == [`
`294`	`294`	`(`
`295`	`295`	`'<object attributes> "a" attribute values differ : '`
`296`		`- "array([1., 2., 3.]) != array([ 1., nan, 3.])"`
	`296`	`+ "[1.0, 2.0, 3.0] != [1.0, nan, 3.0]"`
`297`	`297`	`)`
`298`	`298`	`]`
`299`	`299`
Original file line number	Diff line number	Diff line change
`@@ -270,7 +270,7 @@ def test_value(self, attr_context):`
`270`	`270`	`value_string = "11"`
`271`	`271`	`expected = [`
`272`	`272`	`f'{self.location_string} "att1" attribute values differ : '`
`273`		`- f"array({value_string}) != array(999)"`
	`273`	`+ f"{value_string} != 999"`
`274`	`274`	`]`
`275`	`275`	`check(errs, expected)`
`276`	`276`