pp-mo · pp-mo · Sep 2, 2025 · Mar 6, 2025 · Apr 9, 2025 · Sep 2, 2025
diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py
@@ -387,6 +387,25 @@ def variable_differences(
         ],  # for some reason, this doesn't always list consistently
     )
 
+    # shapes
+    def safe_varshape(var):
+        if _isncdata(var):
+            # NcVariable passed
+            if var.data is None:
+                # Allow for NcVariable.data to be empty
+                shape = None
+            else:
+                shape = var.data.shape
+        else:
+            # netCDF4.Variable passed
+            shape = var.shape
+        return shape
+
+    shape, shape2 = [safe_varshape(v) for v in (v1, v2)]
+    if shape != shape2:
+        msg = f"{var_id_string} shapes differ : {shape!r} != {shape2!r}"
+        errs.append(msg)
+
     # dtypes
     dtype, dtype2 = [v.dtype if _isncdata(v) else v.datatype for v in (v1, v2)]
     if dtype != dtype2:
@@ -403,8 +422,12 @@ def _is_strtype(dt):
 
     is_str, is_str2 = (_is_strtype(dt) for dt in (dtype, dtype2))
     # TODO: is this correct check to allow compare between different dtypes?
-    if check_var_data and dims == dims2 and is_str == is_str2:
-        # N.B. don't check shapes here: we already checked dimensions.
+    if (
+        check_var_data
+        and dims == dims2
+        and shape == shape2
+        and is_str == is_str2
+    ):
         # NOTE: no attempt to use laziness here.  Could be improved.
         def getdata(var):
             if _isncdata(var):

diff --git a/tests/unit/utils/compare_nc_datasets/test_variable_differences.py b/tests/unit/utils/compare_nc_datasets/test_variable_differences.py
@@ -162,6 +162,51 @@ def test_signed_unsigned(self, equaldata):
             )
         check(errs, expected)
 
+    @pytest.mark.parametrize("given", ["nodata", "data", "dtype"])
+    def test_nodata_nodtype(self, given):
+        # Check that we can correctly compare a variable with NO specified data or dtype,
+        # with one that may have either.
+        # N.B. this omits comparing 2 variables with dtype only. See following.
+        v1 = NcVariable("x")
+
+        kwargs = {}
+        if given == "data":
+            kwargs["data"] = [1, 2]
+            expected = [
+                'Variable "x" shapes differ : None != (2,)',
+                "Variable \"x\" datatypes differ : None != dtype('int64')",
+            ]
+        elif given == "dtype":
+            kwargs["dtype"] = np.float32
+            expected = [
+                "Variable \"x\" datatypes differ : None != dtype('float32')"
+            ]
+        elif given == "nodata":
+            expected = []
+        else:
+            raise ValueError(f"unrecognised 'given' param : {given!s}")
+
+        v2 = NcVariable("x", **kwargs)
+        errs = variable_differences(v1, v2)
+        check(errs, expected)
+
+    @pytest.mark.parametrize("equality", ["same", "different"])
+    def test_nodata_withdtype(self, equality):
+        # Check that we can correctly compare variables which have dtype but no data.
+        # N.B. the other possibilities are all covered in the "nodata_nodtype" test.
+        dtype = np.int16
+        v1 = NcVariable("x", dtype=dtype)
+        expected = []
+        if equality == "different":
+            dtype = np.float16
+            expected = [
+                "Variable \"x\" datatypes differ : dtype('int16') != dtype('float16')"
+            ]
+
+        v2 = NcVariable("x", dtype=dtype)
+        errs = variable_differences(v1, v2)
+        check(errs, expected)
+
 
 class TestDataCheck__controls:
     # Note: testing variable comparison via the 'main' public API instead of