diff --git a/README.md b/README.md index 8faa2736..fd8c8598 100644 --- a/README.md +++ b/README.md @@ -79,10 +79,8 @@ cubes = to_iris(ncdata) enable_lockshare(xarray=True) dataset = xr.open_dataset("file1.nc") xr_ncdata = from_xarray(dataset) -xr_ncdata.dimensions.rename("dim0", "newdim") -# N.B. must also replace the name in dimension-lists of variables -for var in xr_ncdata.variables.values(): - var.dimensions = ["newdim" if dim == "dim0" else dim for dim in var.dimensions] +from ncdata.utils import rename_dimension +rename_dimension(xr_ncdata, "dim0", "newdim") to_nc4(ncdata, "file_2a.nc") ``` diff --git a/docs/changelog_fragments/123.feat.rst b/docs/changelog_fragments/123.dev.rst similarity index 100% rename from docs/changelog_fragments/123.feat.rst rename to docs/changelog_fragments/123.dev.rst diff --git a/docs/changelog_fragments/87.feat.rst b/docs/changelog_fragments/87.feat.rst new file mode 100644 index 00000000..2d6149cd --- /dev/null +++ b/docs/changelog_fragments/87.feat.rst @@ -0,0 +1,3 @@ +Added the :func:`~ncdata.utils.rename_dimension` utility. +This provides a "safe" dimension rename, which also replaces +the name in all variables which use it. diff --git a/docs/userdocs/getting_started/introduction.rst b/docs/userdocs/getting_started/introduction.rst index 24aa5bd3..0a4a51d8 100644 --- a/docs/userdocs/getting_started/introduction.rst +++ b/docs/userdocs/getting_started/introduction.rst @@ -185,6 +185,8 @@ There is also a 'rename' method of variables/attributes/groups: Renaming a :class:`~ncdata.NcDimension` within a :class:`~ncdata.NcData` does *not* adjust the variables which reference it, since a variable's :attr:`~ncdata.NcVariable.dimensions` is a simple list of names. + But there is a :func:`~ncdata.utils.rename_dimension` utility which does this + "right". See : :ref:`howto_rename_dimension` , also :func:`ncdata.utils.save_errors`. diff --git a/docs/userdocs/user_guide/common_operations.rst b/docs/userdocs/user_guide/common_operations.rst index e128e753..3e484c35 100644 --- a/docs/userdocs/user_guide/common_operations.rst +++ b/docs/userdocs/user_guide/common_operations.rst @@ -70,7 +70,8 @@ Example : .. warning:: Renaming a dimension will not rename references to it (i.e. in variables), which obviously may cause problems. - We may add a utility to do this safely in future. + The utility function :func:`~ncdata.utils.rename_dimension` is provided for this. + See : :ref:`howto_rename_dimension`. Copying ------- diff --git a/docs/userdocs/user_guide/howtos.rst b/docs/userdocs/user_guide/howtos.rst index 75f8000f..48e967c0 100644 --- a/docs/userdocs/user_guide/howtos.rst +++ b/docs/userdocs/user_guide/howtos.rst @@ -126,17 +126,17 @@ Note that this affects both the element's container key *and* its ``.name``. .. Warning:: - Renaming a **dimension** can cause problems, so must be done with care. - See :ref:`howto_rename_dimension`. - -.. Warning:: - - **Why Not Just...** ``dim = data.dimensions['x']; dim.name = "q"`` ? + **Why Not Just...** ``var = data.variables['x']; var.name = "q"`` ? This would break the expected ``key == elements[key].name`` rule. We don't prevent this, but it is usually a mistake. :func:`~ncdata.utils.save_errors` detects this type of problem. +.. Warning:: + + Renaming a **dimension** can cause particular problems, so must be done with care. + See :ref:`howto_rename_dimension`. + .. _howto_rename_dimension: @@ -145,10 +145,40 @@ Rename a dimension Simply using ``ncdata.dimensions.rename()`` can cause problems, because you must then **also** replace the name where it occurs in the dimensions of any variables. -.. Note:: +Instead, you should use the :func:`~ncdata.utils.rename_dimension` function, which does +this correctly. + +For example: + +.. doctest:: python + + >>> from ncdata.utils import rename_dimension + >>> ncdata = NcData( + ... dimensions=[NcDimension("x", 3), NcDimension("y", 4)], + ... variables=[NcVariable("vy", ["y"]), NcVariable("vzyx", ["z", "y", "x"])] + ... ) + >>> print(ncdata) + + dimensions: + x = 3 + y = 4 + + variables: + ): vy(y)> + ): vzyx(z, y, x)> + > - **To-Do** : there should be a utility for this, but as yet it does not exist. - See `Issue#87 `_. + >>> rename_dimension(ncdata, "y", "qqq") + >>> print(ncdata) + + dimensions: + x = 3 + qqq = 4 + + variables: + ): vy(qqq)> + ): vzyx(z, qqq, x)> + > .. _howto_read_attr: diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py index c2e0b445..a91e9e34 100644 --- a/lib/ncdata/utils/__init__.py +++ b/lib/ncdata/utils/__init__.py @@ -2,11 +2,13 @@ from ._compare_nc_datasets import dataset_differences, variable_differences from ._copy import ncdata_copy +from ._rename_dim import rename_dimension from ._save_errors import save_errors __all__ = [ "dataset_differences", "ncdata_copy", + "rename_dimension", "save_errors", "variable_differences", ] diff --git a/lib/ncdata/utils/_rename_dim.py b/lib/ncdata/utils/_rename_dim.py new file mode 100644 index 00000000..35aa4366 --- /dev/null +++ b/lib/ncdata/utils/_rename_dim.py @@ -0,0 +1,66 @@ +"""Utility to rename dimensions.""" + +from ncdata import NcData + + +def _rename_dims_in_vars(ncdata: NcData, name_from: str, name_to: str) -> None: + """Rename a dimension in all contained variables which reference it.""" + for var in ncdata.variables.values(): + if name_from in var.dimensions: + var.dimensions = tuple( + [ + name_to if name == name_from else name + for name in var.dimensions + ] + ) + + # Also rename in all sub-groups, except where the dimension is redefined ("scope hole"). + for grp in ncdata.groups.values(): + if name_from not in grp.dimensions: + _rename_dims_in_vars(grp, name_from, name_to) + + +def rename_dimension(ncdata: NcData, name_from: str, name_to: str) -> None: + """ + Rename a dimension of an :class:`~ncdata.NcData`. + + This function calls ``ncdata.dimensions.rename``, but then it *also* renames the + dimension in all the variables which reference it, including those in sub-groups. + + Parameters + ---------- + ncdata : NcData + data with a top-level dimension to rename. + + name_from: str + existing name of dimension to rename. + + name_to: str + new name of dimension. + + Notes + ----- + * The operation is in-place. To produce a *new* :class:`~ncdata.NcData` with the + renamed dimension, create a copy first with :meth:`~ncdata.NcData.copy`. + + * Unlike a simple :meth:`~ncdata.NameMap.rename`, this checks whether a dimension + of the new name already exists, and if so raises an error. + + """ + + def check_name_collides(ncdata, name_to, group_path=""): + if name_to in ncdata.dimensions: + inner = f' in group "{group_path}"' if group_path else "" + msg = ( + f"Cannot rename dimension {name_from!r} to {name_to!r}, " + f"because a {name_to!r} dimension already exists{inner}." + ) + raise ValueError(msg) + + for group in ncdata.groups.values(): + inner_path = group_path + "/" + group.name + check_name_collides(group, name_to, group_path=inner_path) + + check_name_collides(ncdata, name_to) + ncdata.dimensions.rename(name_from, name_to) + _rename_dims_in_vars(ncdata, name_from, name_to) diff --git a/tests/unit/utils/test_ncdata_copy.py b/tests/unit/utils/test_ncdata_copy.py index 6a49bd83..f0e534fa 100644 --- a/tests/unit/utils/test_ncdata_copy.py +++ b/tests/unit/utils/test_ncdata_copy.py @@ -1,4 +1,4 @@ -"""Tests for class :class:`ncdata.utils.ncdata_copy`. +"""Tests for :func:`ncdata.utils.ncdata_copy`. This is generic utility function version of the copy operation. """ diff --git a/tests/unit/utils/test_rename_dimension.py b/tests/unit/utils/test_rename_dimension.py new file mode 100644 index 00000000..feab000e --- /dev/null +++ b/tests/unit/utils/test_rename_dimension.py @@ -0,0 +1,150 @@ +"""Tests for :func:`ncdata.utils.rename_dimension`.""" + +import numpy as np +import pytest +from ncdata import NcData, NcDimension, NcVariable +from ncdata.utils import rename_dimension, save_errors + + +def make_saveable( + ncdata: NcData, _outer_dims: dict[str, NcDimension] | None = None +): + """Add missing dimensions + data, to make a sample NcData save-able. + + Create any missing variable dimensions (length==2). + Add any missing variable data arrays. + + N.B. this might actually be a useful utility one day?? + """ + if _outer_dims is None: + _outer_dims = {} + + outer_dim_names = [dim.name for dim in _outer_dims.values()] + + def getdim(dimname): + """Fetch a known dimension by name. + + Check own, then "outer" ones, to correctly implement *scope masking*. + """ + if dimname in ncdata.dimensions: + # These ones must take precedence! + result = ncdata.dimensions[dimname] + else: + # If not here, it should be in the 'outer' dims (else error). + result = _outer_dims[dimname] + return result + + for var in ncdata.variables.values(): + # Where variables reference dims which don't exist, create them (length=2). + for dimname in var.dimensions: + # Note: list of dims we check is *dynamic* (since we may add them) + if dimname not in outer_dim_names + list(ncdata.dimensions.keys()): + ncdata.dimensions.add(NcDimension(dimname, 2)) + + # Where variables have no data, add some. + if var.data is None: + shape = tuple([getdim(dimname).size for dimname in var.dimensions]) + var.data = np.zeros(shape) + + # recurse through groups. + all_dims = _outer_dims.copy() + all_dims.update(ncdata.dimensions) + for grp in ncdata.groups.values(): + make_saveable(grp, _outer_dims=all_dims) + + +class TestRenameDimension: + """Tests for :func:`ncdata.utils.rename_dimension`.""" + + @pytest.fixture(autouse=True) + def setup(self): + self.ncdata = NcData( + dimensions=[ + NcDimension("y", 2), + NcDimension("x", 3), + ], + variables=[ + NcVariable("vx", ["x"], data=[0, 1, 2]), + NcVariable("vy", ["y"], data=[11, 12]), + NcVariable("vyx", ["y", "x"], data=np.zeros((2, 3))), + ], + ) + + def test_basic(self): + ncdata = self.ncdata + xdim = ncdata.dimensions["x"] + rename_dimension(ncdata, "x", "zz") + assert ncdata.dimensions["zz"] is xdim + assert ncdata.variables["vx"].dimensions == ("zz",) + assert ncdata.variables["vy"].dimensions == ("y",) + assert ncdata.variables["vyx"].dimensions == ("y", "zz") + # Check that the result is still save-able. + assert save_errors(ncdata) == [] + + def test_name_collision_fail(self): + ncdata = self.ncdata + msg = "Cannot rename dimension 'x' to 'y', because a 'y' dimension already exists." + with pytest.raises(ValueError, match=msg): + rename_dimension(ncdata, "x", "y") + + @pytest.mark.parametrize( + "innergroup", [False, True], ids=["maingroup", "innergroup"] + ) + def test_name_collision_ingroup_fail(self, innergroup): + ncdata = self.ncdata + grp = NcData(name="inner", dimensions=[NcDimension("z", 2)]) + msg = "Cannot rename dimension 'x' to 'z', because a 'z' dimension already exists" + if innergroup: + grp = NcData(name="main", groups=[grp]) + msg += ' in group "/main/inner".' + else: + msg += ' in group "/inner".' + ncdata.groups.add(grp) + with pytest.raises(ValueError, match=msg): + rename_dimension(ncdata, "x", "z") + + @pytest.fixture() + def group_example(self, setup): + ncdata = self.ncdata.copy() + ncdata.groups.addall( + [ + NcData( + "a", + variables=[ + NcVariable("ax", ["x"]), + NcVariable("aqxr", ["q", "x", "r"]), + ], + ), + NcData( + "b", + dimensions=[NcDimension("x", 20)], + variables=[NcVariable("bx", ["x"])], + ), + ] + ) + yield ncdata + + def test_groups(self, group_example): + ncdata = group_example + rename_dimension(ncdata, "x", "zz") + assert ncdata.groups["a"].variables["ax"].dimensions == ("zz",) + assert ncdata.groups["a"].variables["aqxr"].dimensions == ( + "q", + "zz", + "r", + ) + # This one doesn't get renamed: it is in a "scope hole" because the group + # defines its own "x" dimension, which takes precedence. + assert ncdata.groups["b"].variables["bx"].dimensions == ("x",) + + def test_saveable(self, group_example): + # Construct a complex example, make it saveable, and check that a renamed + # version is still saveable. + ncdata = group_example.copy() + + make_saveable(ncdata) + assert save_errors(ncdata) == [] + + # now rename and try again. + rename_dimension(ncdata, "x", "zz") + assert save_errors(ncdata) == []