diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index b9892a3..f70003f 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -387,6 +387,25 @@ def variable_differences( ], # for some reason, this doesn't always list consistently ) + # shapes + def safe_varshape(var): + if _isncdata(var): + # NcVariable passed + if var.data is None: + # Allow for NcVariable.data to be empty + shape = None + else: + shape = var.data.shape + else: + # netCDF4.Variable passed + shape = var.shape + return shape + + shape, shape2 = [safe_varshape(v) for v in (v1, v2)] + if shape != shape2: + msg = f"{var_id_string} shapes differ : {shape!r} != {shape2!r}" + errs.append(msg) + # dtypes dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)] if dtype != dtype2: @@ -403,8 +422,12 @@ def _is_strtype(dt): is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2)) # TODO: is this correct check to allow compare between different dtypes? - if check_var_data and dims == dims2 and is_str == is_str2: - # N.B. don't check shapes here: we already checked dimensions. + if ( + check_var_data + and dims == dims2 + and shape == shape2 + and is_str == is_str2 + ): # NOTE: no attempt to use laziness here. Could be improved. def getdata(var): if _isncdata(var): diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py index ddb6011..f0f553c 100644 --- a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py +++ b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py @@ -162,6 +162,51 @@ def test_signed_unsigned(self, equaldata): ) check(errs, expected) + @pytest.mark.parametrize("given", ["nodata", "data", "dtype"]) + def test_nodata_nodtype(self, given): + # Check that we can correctly compare a variable with NO specified data or dtype, + # with one that may have either. + # N.B. this omits comparing 2 variables with dtype only. See following. + v1 = NcVariable("x") + + kwargs = {} + if given == "data": + kwargs["data"] = [1, 2] + expected = [ + 'Variable "x" shapes differ : None != (2,)', + "Variable \"x\" datatypes differ : None != dtype('int64')", + ] + elif given == "dtype": + kwargs["dtype"] = np.float32 + expected = [ + "Variable \"x\" datatypes differ : None != dtype('float32')" + ] + elif given == "nodata": + expected = [] + else: + raise ValueError(f"unrecognised 'given' param : {given!s}") + + v2 = NcVariable("x", **kwargs) + errs = variable_differences(v1, v2) + check(errs, expected) + + @pytest.mark.parametrize("equality", ["same", "different"]) + def test_nodata_withdtype(self, equality): + # Check that we can correctly compare variables which have dtype but no data. + # N.B. the other possibilities are all covered in the "nodata_nodtype" test. + dtype = np.int16 + v1 = NcVariable("x", dtype=dtype) + expected = [] + if equality == "different": + dtype = np.float16 + expected = [ + "Variable \"x\" datatypes differ : dtype('int16') != dtype('float16')" + ] + + v2 = NcVariable("x", dtype=dtype) + errs = variable_differences(v1, v2) + check(errs, expected) + class TestDataCheck__controls: # Note: testing variable comparison via the 'main' public API instead of