Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,8 @@ cubes = to_iris(ncdata)
enable_lockshare(xarray=True)
dataset = xr.open_dataset("file1.nc")
xr_ncdata = from_xarray(dataset)
xr_ncdata.dimensions.rename("dim0", "newdim")
# N.B. must also replace the name in dimension-lists of variables
for var in xr_ncdata.variables.values():
var.dimensions = ["newdim" if dim == "dim0" else dim for dim in var.dimensions]
from ncdata.utils import rename_dimension
rename_dimension(xr_ncdata, "dim0", "newdim")
to_nc4(ncdata, "file_2a.nc")
```

Expand Down
3 changes: 3 additions & 0 deletions docs/changelog_fragments/87.feat.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added the :func:`~ncdata.utils.rename_dimension` utility.
This provides a "safe" dimension rename, which also replaces
the name in all variables which use it.
2 changes: 2 additions & 0 deletions docs/userdocs/getting_started/introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,8 @@ There is also a 'rename' method of variables/attributes/groups:
Renaming a :class:`~ncdata.NcDimension` within a :class:`~ncdata.NcData`
does *not* adjust the variables which reference it, since a variable's
:attr:`~ncdata.NcVariable.dimensions` is a simple list of names.
But there is a :func:`~ncdata.utils.rename_dimension` utility which does this
"right".
See : :ref:`howto_rename_dimension` , also :func:`ncdata.utils.save_errors`.


Expand Down
3 changes: 2 additions & 1 deletion docs/userdocs/user_guide/common_operations.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ Example :
.. warning::
Renaming a dimension will not rename references to it (i.e. in variables), which
obviously may cause problems.
We may add a utility to do this safely in future.
The utility function :func:`~ncdata.utils.rename_dimension` is provided for this.
See : :ref:`howto_rename_dimension`.

Copying
-------
Expand Down
48 changes: 39 additions & 9 deletions docs/userdocs/user_guide/howtos.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,17 @@ Note that this affects both the element's container key *and* its ``.name``.

.. Warning::

Renaming a **dimension** can cause problems, so must be done with care.
See :ref:`howto_rename_dimension`.

.. Warning::

**Why Not Just...** ``dim = data.dimensions['x']; dim.name = "q"`` ?
**Why Not Just...** ``var = data.variables['x']; var.name = "q"`` ?

This would break the expected ``key == elements[key].name`` rule.
We don't prevent this, but it is usually a mistake.
:func:`~ncdata.utils.save_errors` detects this type of problem.

.. Warning::

Renaming a **dimension** can cause particular problems, so must be done with care.
See :ref:`howto_rename_dimension`.


.. _howto_rename_dimension:

Expand All @@ -145,10 +145,40 @@ Rename a dimension
Simply using ``ncdata.dimensions.rename()`` can cause problems, because you must then
**also** replace the name where it occurs in the dimensions of any variables.

.. Note::
Instead, you should use the :func:`~ncdata.utils.rename_dimension` function, which does
this correctly.

For example:

.. doctest:: python

>>> from ncdata.utils import rename_dimension
>>> ncdata = NcData(
... dimensions=[NcDimension("x", 3), NcDimension("y", 4)],
... variables=[NcVariable("vy", ["y"]), NcVariable("vzyx", ["z", "y", "x"])]
... )
>>> print(ncdata)
<NcData: <'no-name'>
dimensions:
x = 3
y = 4
<BLANKLINE>
variables:
<NcVariable(<no-dtype>): vy(y)>
<NcVariable(<no-dtype>): vzyx(z, y, x)>
>

**To-Do** : there should be a utility for this, but as yet it does not exist.
See `Issue#87 <https://github.com/pp-mo/ncdata/issues/87>`_.
>>> rename_dimension(ncdata, "y", "qqq")
>>> print(ncdata)
<NcData: <'no-name'>
dimensions:
x = 3
qqq = 4
<BLANKLINE>
variables:
<NcVariable(<no-dtype>): vy(qqq)>
<NcVariable(<no-dtype>): vzyx(z, qqq, x)>
>


.. _howto_read_attr:
Expand Down
2 changes: 2 additions & 0 deletions lib/ncdata/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

from ._compare_nc_datasets import dataset_differences, variable_differences
from ._copy import ncdata_copy
from ._rename_dim import rename_dimension
from ._save_errors import save_errors

__all__ = [
"dataset_differences",
"ncdata_copy",
"rename_dimension",
"save_errors",
"variable_differences",
]
66 changes: 66 additions & 0 deletions lib/ncdata/utils/_rename_dim.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Utility to rename dimensions."""

from ncdata import NcData


def _rename_dims_in_vars(ncdata: NcData, name_from: str, name_to: str) -> None:
"""Rename a dimension in all contained variables which reference it."""
for var in ncdata.variables.values():
if name_from in var.dimensions:
var.dimensions = tuple(
[
name_to if name == name_from else name
for name in var.dimensions
]
)

# Also rename in all sub-groups, except where the dimension is redefined ("scope hole").
for grp in ncdata.groups.values():
if name_from not in grp.dimensions:
_rename_dims_in_vars(grp, name_from, name_to)


def rename_dimension(ncdata: NcData, name_from: str, name_to: str) -> None:
"""
Rename a dimension of an :class:`~ncdata.NcData`.

This function calls ``ncdata.dimensions.rename``, but then it *also* renames the
dimension in all the variables which reference it, including those in sub-groups.

Parameters
----------
ncdata : NcData
data with a top-level dimension to rename.

name_from: str
existing name of dimension to rename.

name_to: str
new name of dimension.

Notes
-----
* The operation is in-place. To produce a *new* :class:`~ncdata.NcData` with the
renamed dimension, create a copy first with :meth:`~ncdata.NcData.copy`.

* Unlike a simple :meth:`~ncdata.NameMap.rename`, this checks whether a dimension
of the new name already exists, and if so raises an error.

"""

def check_name_collides(ncdata, name_to, group_path=""):
if name_to in ncdata.dimensions:
inner = f' in group "{group_path}"' if group_path else ""
msg = (
f"Cannot rename dimension {name_from!r} to {name_to!r}, "
f"because a {name_to!r} dimension already exists{inner}."
)
raise ValueError(msg)

for group in ncdata.groups.values():
inner_path = group_path + "/" + group.name
check_name_collides(group, name_to, group_path=inner_path)

check_name_collides(ncdata, name_to)
ncdata.dimensions.rename(name_from, name_to)
_rename_dims_in_vars(ncdata, name_from, name_to)
2 changes: 1 addition & 1 deletion tests/unit/utils/test_ncdata_copy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Tests for class :class:`ncdata.utils.ncdata_copy`.
"""Tests for :func:`ncdata.utils.ncdata_copy`.

This is generic utility function version of the copy operation.
"""
Expand Down
150 changes: 150 additions & 0 deletions tests/unit/utils/test_rename_dimension.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""Tests for :func:`ncdata.utils.rename_dimension`."""

import numpy as np
import pytest
from ncdata import NcData, NcDimension, NcVariable
from ncdata.utils import rename_dimension, save_errors


def make_saveable(
ncdata: NcData, _outer_dims: dict[str, NcDimension] | None = None
):
"""Add missing dimensions + data, to make a sample NcData save-able.

Create any missing variable dimensions (length==2).
Add any missing variable data arrays.

N.B. this might actually be a useful utility one day??
"""
if _outer_dims is None:
_outer_dims = {}

outer_dim_names = [dim.name for dim in _outer_dims.values()]

def getdim(dimname):
"""Fetch a known dimension by name.

Check own, then "outer" ones, to correctly implement *scope masking*.
"""
if dimname in ncdata.dimensions:
# These ones must take precedence!
result = ncdata.dimensions[dimname]
else:
# If not here, it should be in the 'outer' dims (else error).
result = _outer_dims[dimname]
return result

for var in ncdata.variables.values():
# Where variables reference dims which don't exist, create them (length=2).
for dimname in var.dimensions:
# Note: list of dims we check is *dynamic* (since we may add them)
if dimname not in outer_dim_names + list(ncdata.dimensions.keys()):
ncdata.dimensions.add(NcDimension(dimname, 2))

# Where variables have no data, add some.
if var.data is None:
shape = tuple([getdim(dimname).size for dimname in var.dimensions])
var.data = np.zeros(shape)

# recurse through groups.
all_dims = _outer_dims.copy()
all_dims.update(ncdata.dimensions)
for grp in ncdata.groups.values():
make_saveable(grp, _outer_dims=all_dims)


class TestRenameDimension:
"""Tests for :func:`ncdata.utils.rename_dimension`."""

@pytest.fixture(autouse=True)
def setup(self):
self.ncdata = NcData(
dimensions=[
NcDimension("y", 2),
NcDimension("x", 3),
],
variables=[
NcVariable("vx", ["x"], data=[0, 1, 2]),
NcVariable("vy", ["y"], data=[11, 12]),
NcVariable("vyx", ["y", "x"], data=np.zeros((2, 3))),
],
)

def test_basic(self):
ncdata = self.ncdata
xdim = ncdata.dimensions["x"]
rename_dimension(ncdata, "x", "zz")
assert ncdata.dimensions["zz"] is xdim
assert ncdata.variables["vx"].dimensions == ("zz",)
assert ncdata.variables["vy"].dimensions == ("y",)
assert ncdata.variables["vyx"].dimensions == ("y", "zz")
# Check that the result is still save-able.
assert save_errors(ncdata) == []

def test_name_collision_fail(self):
ncdata = self.ncdata
msg = "Cannot rename dimension 'x' to 'y', because a 'y' dimension already exists."
with pytest.raises(ValueError, match=msg):
rename_dimension(ncdata, "x", "y")

@pytest.mark.parametrize(
"innergroup", [False, True], ids=["maingroup", "innergroup"]
)
def test_name_collision_ingroup_fail(self, innergroup):
ncdata = self.ncdata
grp = NcData(name="inner", dimensions=[NcDimension("z", 2)])
msg = "Cannot rename dimension 'x' to 'z', because a 'z' dimension already exists"
if innergroup:
grp = NcData(name="main", groups=[grp])
msg += ' in group "/main/inner".'
else:
msg += ' in group "/inner".'
ncdata.groups.add(grp)
with pytest.raises(ValueError, match=msg):
rename_dimension(ncdata, "x", "z")

@pytest.fixture()
def group_example(self, setup):
ncdata = self.ncdata.copy()
ncdata.groups.addall(
[
NcData(
"a",
variables=[
NcVariable("ax", ["x"]),
NcVariable("aqxr", ["q", "x", "r"]),
],
),
NcData(
"b",
dimensions=[NcDimension("x", 20)],
variables=[NcVariable("bx", ["x"])],
),
]
)
yield ncdata

def test_groups(self, group_example):
ncdata = group_example
rename_dimension(ncdata, "x", "zz")
assert ncdata.groups["a"].variables["ax"].dimensions == ("zz",)
assert ncdata.groups["a"].variables["aqxr"].dimensions == (
"q",
"zz",
"r",
)
# This one doesn't get renamed: it is in a "scope hole" because the group
# defines its own "x" dimension, which takes precedence.
assert ncdata.groups["b"].variables["bx"].dimensions == ("x",)

def test_saveable(self, group_example):
# Construct a complex example, make it saveable, and check that a renamed
# version is still saveable.
ncdata = group_example.copy()

make_saveable(ncdata)
assert save_errors(ncdata) == []

# now rename and try again.
rename_dimension(ncdata, "x", "zz")
assert save_errors(ncdata) == []
Loading