Skip to content

Commit cbc425d

Browse files
Pre-check var shapes in dataset difference (#153)
* Fix error in dataset comparison. * Fix dataset/variable difference for no-data variables. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent fbd2e93 commit cbc425d

File tree

2 files changed

+70
-2
lines changed

2 files changed

+70
-2
lines changed

lib/ncdata/utils/_compare_nc_datasets.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,25 @@ def variable_differences(
387387
], # for some reason, this doesn't always list consistently
388388
)
389389

390+
# shapes
391+
def safe_varshape(var):
392+
if _isncdata(var):
393+
# NcVariable passed
394+
if var.data is None:
395+
# Allow for NcVariable.data to be empty
396+
shape = None
397+
else:
398+
shape = var.data.shape
399+
else:
400+
# netCDF4.Variable passed
401+
shape = var.shape
402+
return shape
403+
404+
shape, shape2 = [safe_varshape(v) for v in (v1, v2)]
405+
if shape != shape2:
406+
msg = f"{var_id_string} shapes differ : {shape!r} != {shape2!r}"
407+
errs.append(msg)
408+
390409
# dtypes
391410
dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)]
392411
if dtype != dtype2:
@@ -403,8 +422,12 @@ def _is_strtype(dt):
403422

404423
is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2))
405424
# TODO: is this correct check to allow compare between different dtypes?
406-
if check_var_data and dims == dims2 and is_str == is_str2:
407-
# N.B. don't check shapes here: we already checked dimensions.
425+
if (
426+
check_var_data
427+
and dims == dims2
428+
and shape == shape2
429+
and is_str == is_str2
430+
):
408431
# NOTE: no attempt to use laziness here. Could be improved.
409432
def getdata(var):
410433
if _isncdata(var):

tests/unit/utils/compare_nc_datasets/test_variable_differences.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,51 @@ def test_signed_unsigned(self, equaldata):
162162
)
163163
check(errs, expected)
164164

165+
@pytest.mark.parametrize("given", ["nodata", "data", "dtype"])
166+
def test_nodata_nodtype(self, given):
167+
# Check that we can correctly compare a variable with NO specified data or dtype,
168+
# with one that may have either.
169+
# N.B. this omits comparing 2 variables with dtype only. See following.
170+
v1 = NcVariable("x")
171+
172+
kwargs = {}
173+
if given == "data":
174+
kwargs["data"] = [1, 2]
175+
expected = [
176+
'Variable "x" shapes differ : None != (2,)',
177+
"Variable \"x\" datatypes differ : None != dtype('int64')",
178+
]
179+
elif given == "dtype":
180+
kwargs["dtype"] = np.float32
181+
expected = [
182+
"Variable \"x\" datatypes differ : None != dtype('float32')"
183+
]
184+
elif given == "nodata":
185+
expected = []
186+
else:
187+
raise ValueError(f"unrecognised 'given' param : {given!s}")
188+
189+
v2 = NcVariable("x", **kwargs)
190+
errs = variable_differences(v1, v2)
191+
check(errs, expected)
192+
193+
@pytest.mark.parametrize("equality", ["same", "different"])
194+
def test_nodata_withdtype(self, equality):
195+
# Check that we can correctly compare variables which have dtype but no data.
196+
# N.B. the other possibilities are all covered in the "nodata_nodtype" test.
197+
dtype = np.int16
198+
v1 = NcVariable("x", dtype=dtype)
199+
expected = []
200+
if equality == "different":
201+
dtype = np.float16
202+
expected = [
203+
"Variable \"x\" datatypes differ : dtype('int16') != dtype('float16')"
204+
]
205+
206+
v2 = NcVariable("x", dtype=dtype)
207+
errs = variable_differences(v1, v2)
208+
check(errs, expected)
209+
165210

166211
class TestDataCheck__controls:
167212
# Note: testing variable comparison via the 'main' public API instead of

0 commit comments

Comments
 (0)