Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions lib/ncdata/utils/_compare_nc_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,25 @@ def variable_differences(
], # for some reason, this doesn't always list consistently
)

# shapes
def safe_varshape(var):
if _isncdata(var):
# NcVariable passed
if var.data is None:
# Allow for NcVariable.data to be empty
shape = None
else:
shape = var.data.shape
else:
# netCDF4.Variable passed
shape = var.shape
return shape

shape, shape2 = [safe_varshape(v) for v in (v1, v2)]
if shape != shape2:
msg = f"{var_id_string} shapes differ : {shape!r} != {shape2!r}"
errs.append(msg)

# dtypes
dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)]
if dtype != dtype2:
Expand All @@ -403,8 +422,12 @@ def _is_strtype(dt):

is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2))
# TODO: is this correct check to allow compare between different dtypes?
if check_var_data and dims == dims2 and is_str == is_str2:
# N.B. don't check shapes here: we already checked dimensions.
if (
check_var_data
and dims == dims2
and shape == shape2
and is_str == is_str2
):
# NOTE: no attempt to use laziness here. Could be improved.
def getdata(var):
if _isncdata(var):
Expand Down
45 changes: 45 additions & 0 deletions tests/unit/utils/compare_nc_datasets/test_variable_differences.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,51 @@ def test_signed_unsigned(self, equaldata):
)
check(errs, expected)

@pytest.mark.parametrize("given", ["nodata", "data", "dtype"])
def test_nodata_nodtype(self, given):
# Check that we can correctly compare a variable with NO specified data or dtype,
# with one that may have either.
# N.B. this omits comparing 2 variables with dtype only. See following.
v1 = NcVariable("x")

kwargs = {}
if given == "data":
kwargs["data"] = [1, 2]
expected = [
'Variable "x" shapes differ : None != (2,)',
"Variable \"x\" datatypes differ : None != dtype('int64')",
]
elif given == "dtype":
kwargs["dtype"] = np.float32
expected = [
"Variable \"x\" datatypes differ : None != dtype('float32')"
]
elif given == "nodata":
expected = []
else:
raise ValueError(f"unrecognised 'given' param : {given!s}")

v2 = NcVariable("x", **kwargs)
errs = variable_differences(v1, v2)
check(errs, expected)

@pytest.mark.parametrize("equality", ["same", "different"])
def test_nodata_withdtype(self, equality):
# Check that we can correctly compare variables which have dtype but no data.
# N.B. the other possibilities are all covered in the "nodata_nodtype" test.
dtype = np.int16
v1 = NcVariable("x", dtype=dtype)
expected = []
if equality == "different":
dtype = np.float16
expected = [
"Variable \"x\" datatypes differ : dtype('int16') != dtype('float16')"
]

v2 = NcVariable("x", dtype=dtype)
errs = variable_differences(v1, v2)
check(errs, expected)


class TestDataCheck__controls:
# Note: testing variable comparison via the 'main' public API instead of
Expand Down
Loading