From 29bc95145d01230bb8de2422c1dee7379ae1e318 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Wed, 3 Sep 2025 20:39:03 +0100 Subject: [PATCH 1/7] Generalise testing + extend to Slicer tests. --- tests/unit/utils/dim_indexing/test_index_by_dimensions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/utils/dim_indexing/test_index_by_dimensions.py b/tests/unit/utils/dim_indexing/test_index_by_dimensions.py index a0c2200..ea536a5 100644 --- a/tests/unit/utils/dim_indexing/test_index_by_dimensions.py +++ b/tests/unit/utils/dim_indexing/test_index_by_dimensions.py @@ -4,7 +4,7 @@ """ import numpy as np -import pytest + from ncdata.utils import dataset_differences, index_by_dimensions from . import ( # noqa: F401 From c30eec6c267d7dd1387f49c44afc3d0f216e2a1b Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 7 Sep 2025 12:30:44 +0100 Subject: [PATCH 2/7] Multiple docs improvements: fix warnings + increase cross-referencing. --- docs/conf.py | 8 +++- docs/details/developer_notes.rst | 7 ++- .../userdocs/user_guide/common_operations.rst | 2 + docs/userdocs/user_guide/utilities.rst | 32 ++++++++----- lib/ncdata/utils/__init__.py | 10 ++--- lib/ncdata/utils/_compare_nc_datasets.py | 11 +++++ lib/ncdata/utils/_copy.py | 23 ++++++++++ lib/ncdata/utils/_rename_dim.py | 2 + lib/ncdata/utils/_save_errors.py | 45 +++++++++++-------- 9 files changed, 101 insertions(+), 39 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index d9f62c1..32f3f87 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -93,7 +93,13 @@ # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +exclude_patterns = [ + "_build", + "Thumbs.db", + ".DS_Store", + "changelog_fragments", + "details/api/modules.rst", +] # -- Options for HTML output ------------------------------------------------- diff --git a/docs/details/developer_notes.rst b/docs/details/developer_notes.rst index ee27069..ac6667f 100644 --- a/docs/details/developer_notes.rst +++ b/docs/details/developer_notes.rst @@ -36,7 +36,7 @@ For a full docs-build: * the above is just for *local testing*, if required. * For PRs (and releases), we also provide *automatic* builds on GitHub, - via `ReadTheDocs `_ + via ReadTheDocs_. Release actions @@ -52,7 +52,7 @@ Release actions #. Cut a release on GitHub - * this triggers a new docs version on `ReadTheDocs `_. + * this triggers a new docs version on ReadTheDocs_. #. Build the distribution @@ -109,3 +109,6 @@ Release actions * wait a few hours.. * check that the new version appears in the output of ``$ conda search ncdata`` + + +.. _ReadTheDocs: https://readthedocs.org/projects/ncdata diff --git a/docs/userdocs/user_guide/common_operations.rst b/docs/userdocs/user_guide/common_operations.rst index 0637a69..ac737f4 100644 --- a/docs/userdocs/user_guide/common_operations.rst +++ b/docs/userdocs/user_guide/common_operations.rst @@ -55,6 +55,8 @@ Example : >>> dataset.variables["x"].avals["units"] = "m s-1" +.. _operations_rename: + Rename ------ A component can be renamed with the :meth:`~ncdata.NameMap.rename` method. This changes diff --git a/docs/userdocs/user_guide/utilities.rst b/docs/userdocs/user_guide/utilities.rst index 859bb25..6d9d85d 100644 --- a/docs/userdocs/user_guide/utilities.rst +++ b/docs/userdocs/user_guide/utilities.rst @@ -9,12 +9,18 @@ Rename Dimensions The :func:`~ncdata.utils.rename_dimension` utility does this, in a way which ensures a safe and consistent result. +See: :ref:`operations_rename` + + .. _utils_equality: Dataset Equality Testing ------------------------ -The function :func:`~ncdata.utils.dataset_differences` produces a list of messages -detailing all the ways in which two datasets are different. +The functions :func:`~ncdata.utils.dataset_differences` and +:func:`~ncdata.utils.variable_differences` produce a list of messages detailing all the +ways in which two datasets are different. + +See: :ref:`equality_checks` For Example: ^^^^^^^^^^^^ @@ -48,7 +54,7 @@ For Example: Dataset variable "vx" shapes differ : (5,) != (2,) .. note:: - To compare isolated variables, a subsidiary routine + To compare isolated variables, the subsidiary routine :func:`~ncdata.utils.variable_differences` is also provided. .. note:: @@ -62,9 +68,12 @@ For Example: Sub-indexing ------------ -A new dataset can be derived by indexing over dimensions, analagous to sub-indexing -an array. This operation indexes all the variables appropriately, to produce a new -independent dataset which is complete and self-consistent. +The :func:`~ncdata.utils.index_by_dimensions` function and the +:class:`~ncdata.utils.Slicer` class let you extract a subset of a dataset, by indexing +on the data dimensions. This is analagous to sub-indexing an array. + +This operation indexes all the variables appropriately, to produce a new, independent +dataset which is complete and self-consistent. The basic indexing operation is provided in three forms: @@ -197,6 +206,8 @@ Consistency Checking The :func:`~ncdata.utils.save_errors` function provides a general correctness-and-consistency check. +See: :ref:`correctness-checks` + For example: .. testsetup:: @@ -218,13 +229,10 @@ For example: Variable 'q' has a dtype which cannot be saved to netcdf : dtype('O'). -See : :ref:`correctness-checks` - - Data Copying ------------ -The :func:`~ncdata.utils.ncdata_copy` makes structural copies of datasets. -However, this can be easily be accessed as :meth:`ncdata.NcData.copy`, which is the same -operation. +The :func:`~ncdata.utils.ncdata_copy` function makes structural copies of datasets. +However, this can now be more easily accessed as :meth:`ncdata.NcData.copy`, which is +the same operation. See: :ref:`copy_notes` \ No newline at end of file diff --git a/lib/ncdata/utils/__init__.py b/lib/ncdata/utils/__init__.py index 297b3cf..f44fe95 100644 --- a/lib/ncdata/utils/__init__.py +++ b/lib/ncdata/utils/__init__.py @@ -6,12 +6,12 @@ from ._rename_dim import rename_dimension from ._save_errors import save_errors -__all__ = [ - "Slicer", +__all__ = [ # noqa: RUF022 + "rename_dimension", "dataset_differences", + "variable_differences", "index_by_dimensions", - "ncdata_copy", - "rename_dimension", + "Slicer", "save_errors", - "variable_differences", + "ncdata_copy", ] diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index f70003f..9b47244 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -14,6 +14,7 @@ import netCDF4 import netCDF4 as nc import numpy as np + from ncdata import NcData, NcVariable @@ -37,6 +38,8 @@ def dataset_differences( :class:`~ncdata.NcData` objects. File paths are opened with the :mod:`netCDF4` module. + See: :ref:`equality_checks` + Parameters ---------- dataset_or_path_1 : str or Path or netCDF4.Dataset or NcData @@ -93,6 +96,9 @@ def dataset_differences( A list of "error" strings, describing differences between the inputs. If empty, no differences were found. + See Also + -------- + :func:`~ncdata.utils.variable_differences` """ ds1_was_path = not hasattr(dataset_or_path_1, "variables") ds2_was_path = not hasattr(dataset_or_path_2, "variables") @@ -322,6 +328,8 @@ def variable_differences( r""" Compare variables. + See: :ref:`equality_checks` + Parameters ---------- v1, v2 : NcVariable @@ -347,6 +355,9 @@ def variable_differences( A list of "error" strings, describing differences between the inputs. If empty, no differences were found. + See Also + -------- + :func:`~ncdata.utils.dataset_differences` """ errs = [] diff --git a/lib/ncdata/utils/_copy.py b/lib/ncdata/utils/_copy.py index f631df4..e68ee69 100644 --- a/lib/ncdata/utils/_copy.py +++ b/lib/ncdata/utils/_copy.py @@ -17,6 +17,8 @@ def ncdata_copy(ncdata: NcData) -> NcData: The operation makes fresh copies of all ncdata objects, but does not copy variable data arrays. + See: :ref:`copy_notes` + Parameters ---------- ncdata @@ -27,6 +29,27 @@ def ncdata_copy(ncdata: NcData) -> NcData: ncdata identical but distinct copy of input + Notes + ----- + This operation is now also available as an object method: + :meth:`~ncdata.NcData.copy`. For example: + + Syntactically, this is generally more convenient, but the operation is identical. + For example: + + .. testsetup:: + + >>> from ncdata import NcData + >>> from ncdata.utils import dataset_differences, ncdata_copy + >>> data = NcData() + + .. doctest:: + + >>> data1 = ncdata_copy(data) + >>> data2 = data.copy() + >>> dataset_differences(data1, data2) == [] + True + """ return NcData( name=ncdata.name, diff --git a/lib/ncdata/utils/_rename_dim.py b/lib/ncdata/utils/_rename_dim.py index 8043b22..8f86d6f 100644 --- a/lib/ncdata/utils/_rename_dim.py +++ b/lib/ncdata/utils/_rename_dim.py @@ -46,6 +46,8 @@ def rename_dimension(ncdata: NcData, name_from: str, name_to: str) -> None: This function calls ``ncdata.dimensions.rename``, but then it *also* renames the dimension in all the variables which reference it, including those in sub-groups. + See: :ref:`operations_rename` + Parameters ---------- ncdata : NcData diff --git a/lib/ncdata/utils/_save_errors.py b/lib/ncdata/utils/_save_errors.py index b6b0713..e542281 100644 --- a/lib/ncdata/utils/_save_errors.py +++ b/lib/ncdata/utils/_save_errors.py @@ -4,6 +4,7 @@ import netCDF4 as nc import numpy as np + from ncdata import NcData, NcVariable @@ -180,29 +181,15 @@ def _save_errors_inner( def save_errors(ncdata: NcData) -> List[str]: """ - Scan a dataset for it's consistency and completeness. + Scan a dataset for its consistency and completeness. + + See: :ref:`correctness-checks` - Reports on anything that will make this fail to save. + Describe any aspects of this dataset which would prevent it from saving (cause an + error). If there are any such problems, then an attempt to save the ncdata to a netcdf file will fail. If there are none, then a save should succeed. - The checks made are roughly the following - - (1) check names in all components (dimensions, variables, attributes and groups): - - * all names are valid netcdf names - * all element names match their key in the component, - i.e. "component[key].name == key" - - (2) check that all attribute values have netcdf-compatible dtypes. - (E.G. no object or compound (recarray) dtypes). - - (3) check that, for all contained variables : - - * it's dimensions are all present in the enclosing dataset - * it has an attached data array, of a netcdf-compatible dtype - * the shape of it's data matches the lengths of it's dimensions - Parameters ---------- ncdata @@ -213,5 +200,25 @@ def save_errors(ncdata: NcData) -> List[str]: errors A list of strings, error messages describing problems with the dataset. If no errors, returns an empty list. + + Notes + ----- + The checks made are roughly the following: + + **(1)** check names in all components (dimensions, variables, attributes and groups): + + * all names are valid netcdf names + * all element names match their key in the component, + i.e. ``component[key].name == key`` + + **(2)** check that all attribute values have netcdf-compatible dtypes. + + * ( E.G. no object or compound (recarray) dtypes ) + + **(3)** check that, for all contained variables: + + * its dimensions are all present in the enclosing dataset + * it has an attached data array, of a netcdf-compatible dtype + * the shape of its data matches the lengths of its dimensions """ return _save_errors_inner(ncdata) From 7b50fbc2caaa7bceeaef5f6beef940ed0581bd60 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Sun, 7 Sep 2025 19:08:52 +0100 Subject: [PATCH 3/7] Improve docs-build account in developer notes. --- docs/details/developer_notes.rst | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/docs/details/developer_notes.rst b/docs/details/developer_notes.rst index ac6667f..c962019 100644 --- a/docs/details/developer_notes.rst +++ b/docs/details/developer_notes.rst @@ -11,12 +11,21 @@ A new change-note fragment file should be included in each PR, but is normally c with a ``towncrier`` command-line command: * shortly, with ``towncrier create --content "mynotes..." ..rst`` -* ... or for longer forms, use ``towncrier create --edit``. -* Here, "" is one of feat/doc/bug/dev/misc. Which are: user features; - bug fixes; documentation changes; general developer-relevant changes; - or "miscellaneous". + + ... or, for longer content, use ``towncrier create --edit``. + +* Here, "" is one of: + + * "feat": user features + * "doc": documentation changes + * "bug": bug fixes + * "def": general developer-relevant changes + * "misc": miscellaneous + (For reference, these categories are configured in ``pyproject.toml``). + * the fragment files are stored in ``docs/changelog_fragments``. + * N.B. for this to work well, every change should be identified with a matching github issue. If there are multiple associated PRs, they should all be linked to the issue. @@ -26,15 +35,18 @@ Documentation build For a full docs-build: -* a simple ``$ make html`` will do for now +* The most useful way is simply ``$ cd docs`` and ``$ make html-keeplog``. + * Note: the plainer ``$ make html`` is the same, but "-keeplog", in addition, preserves the + changelog fragments **and** reverts the change_log.rst after the html build: + This stops you accidentally including a "built" changelog when making further commits. * The ``docs/Makefile`` wipes the API docs and invokes sphinx-apidoc for a full rebuild * It also calls towncrier to clear out the changelog fragments + update ``docs/change_log.rst``. - This should be reverted before pushing your PR -- i.e. leave changenotes in the fragments. -* the results is then available at ``docs/_build/html/index.html``. +* ( *assuming "-keeplog"*: fragments and change_notes.rst are then reverted, undoing the towncrier build ). +* the result is then available at ``docs/_build/html/index.html``. .. note:: - * the above is just for *local testing*, if required. + * the above is just for *local testing*, when required. * For PRs (and releases), we also provide *automatic* builds on GitHub, via ReadTheDocs_. From 3d6430a45d64cfab0c47359f2783109e912bd4f5 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 2 Oct 2025 17:19:55 +0100 Subject: [PATCH 4/7] Small fixes. --- docs/userdocs/user_guide/utilities.rst | 5 ++--- lib/ncdata/utils/_save_errors.py | 2 +- tests/unit/utils/dim_indexing/test_index_by_dimensions.py | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/userdocs/user_guide/utilities.rst b/docs/userdocs/user_guide/utilities.rst index 6d9d85d..da8f775 100644 --- a/docs/userdocs/user_guide/utilities.rst +++ b/docs/userdocs/user_guide/utilities.rst @@ -68,9 +68,8 @@ For Example: Sub-indexing ------------ -The :func:`~ncdata.utils.index_by_dimensions` function and the -:class:`~ncdata.utils.Slicer` class let you extract a subset of a dataset, by indexing -on the data dimensions. This is analagous to sub-indexing an array. +A new dataset can be derived by indexing over dimensions, analagous to sub-indexing +an array. This operation indexes all the variables appropriately, to produce a new, independent dataset which is complete and self-consistent. diff --git a/lib/ncdata/utils/_save_errors.py b/lib/ncdata/utils/_save_errors.py index e542281..5670e8b 100644 --- a/lib/ncdata/utils/_save_errors.py +++ b/lib/ncdata/utils/_save_errors.py @@ -181,7 +181,7 @@ def _save_errors_inner( def save_errors(ncdata: NcData) -> List[str]: """ - Scan a dataset for its consistency and completeness. + Scan a dataset for consistency and completeness. See: :ref:`correctness-checks` diff --git a/tests/unit/utils/dim_indexing/test_index_by_dimensions.py b/tests/unit/utils/dim_indexing/test_index_by_dimensions.py index ea536a5..099b1d7 100644 --- a/tests/unit/utils/dim_indexing/test_index_by_dimensions.py +++ b/tests/unit/utils/dim_indexing/test_index_by_dimensions.py @@ -4,6 +4,7 @@ """ import numpy as np +import pytest from ncdata.utils import dataset_differences, index_by_dimensions From a0cf155ca47763f2baacaa5bffaa95ef85fdfec2 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Thu, 2 Oct 2025 17:57:55 +0100 Subject: [PATCH 5/7] Further fixes and tweaks. --- docs/userdocs/user_guide/common_operations.rst | 15 +++++++++------ docs/userdocs/user_guide/howtos.rst | 2 +- docs/userdocs/user_guide/utilities.rst | 17 +++++++++-------- lib/ncdata/utils/_compare_nc_datasets.py | 4 ++-- lib/ncdata/utils/_copy.py | 7 ++++--- 5 files changed, 25 insertions(+), 20 deletions(-) diff --git a/docs/userdocs/user_guide/common_operations.rst b/docs/userdocs/user_guide/common_operations.rst index ac737f4..e78dfe2 100644 --- a/docs/userdocs/user_guide/common_operations.rst +++ b/docs/userdocs/user_guide/common_operations.rst @@ -125,14 +125,17 @@ Equality Testing ---------------- We implement equality operations ``==`` / ``!=`` for all the core data objects. -However, simple equality testing on :class:`@ncdata.NcData` and :class:`@ncdata.NcVariable` -objects can be very costly if it requires comparing large data arrays. +.. warning:: + The equality testing actually calls the :func:`ncdata.utils.dataset_differences` and + :func:`ncdata.utils.variable_differences` utility functions. + + This can be very costly if it needs to compare large data arrays. -If you need to avoid comparing large (and possibly lazy) arrays then you can use the +If you need to avoid comparing large (and possibly lazy) arrays then you should use the :func:`ncdata.utils.dataset_differences` and -:func:`ncdata.utils.variable_differences` utility functions. -These functions also provide multiple options to enable more tolerant comparison, -such as allowing variables to have a different ordering. +:func:`ncdata.utils.variable_differences` utility functions directly. +These enable you to use the provided tolerance options, such as ignoring differences in +data content, or accepting that attributes are present in a different order. See: :ref:`utils_equality` diff --git a/docs/userdocs/user_guide/howtos.rst b/docs/userdocs/user_guide/howtos.rst index f821817..7b7a34a 100644 --- a/docs/userdocs/user_guide/howtos.rst +++ b/docs/userdocs/user_guide/howtos.rst @@ -413,7 +413,7 @@ You can also slice data directly, which simply acts on the dimensions in order: >>> data_region_2 == data_region True -See: :ref:`indexing_overview` +See: :ref:`utils_indexing` Read data from a NetCDF file diff --git a/docs/userdocs/user_guide/utilities.rst b/docs/userdocs/user_guide/utilities.rst index da8f775..942db7e 100644 --- a/docs/userdocs/user_guide/utilities.rst +++ b/docs/userdocs/user_guide/utilities.rst @@ -20,8 +20,6 @@ The functions :func:`~ncdata.utils.dataset_differences` and :func:`~ncdata.utils.variable_differences` produce a list of messages detailing all the ways in which two datasets are different. -See: :ref:`equality_checks` - For Example: ^^^^^^^^^^^^ .. testsetup:: @@ -58,13 +56,16 @@ For Example: :func:`~ncdata.utils.variable_differences` is also provided. .. note:: - The ``==`` and ``!-`` operations on :class:`ncdata.NcData` and - :class:`ncdata.NcVariable` are implemented to call these utility functions. - However, lacking a keyword interface to enable any tolerance options, the operations - compare absolutely everything, and so can be very performance intensive if large data - arrays are present. + The ``==`` and ``!=`` operations on :class:`ncdata.NcData` and + :class:`ncdata.NcVariable` simply call these utility functions to see if there are + any differences. + + .. warning:: + As they lack a keyword interface, these operations have no tolerance options + and check absolutely everything. This includes full data-array comparisons, + which could be very costly in time or space if data arrays are large. -.. _indexing_overview: +.. _utils_indexing: Sub-indexing ------------ diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index 9b47244..2084cac 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -38,7 +38,7 @@ def dataset_differences( :class:`~ncdata.NcData` objects. File paths are opened with the :mod:`netCDF4` module. - See: :ref:`equality_checks` + See: :ref:`equality_testing` Parameters ---------- @@ -328,7 +328,7 @@ def variable_differences( r""" Compare variables. - See: :ref:`equality_checks` + See: :ref:`equality_testing` Parameters ---------- diff --git a/lib/ncdata/utils/_copy.py b/lib/ncdata/utils/_copy.py index e68ee69..7ae7c04 100644 --- a/lib/ncdata/utils/_copy.py +++ b/lib/ncdata/utils/_copy.py @@ -32,22 +32,23 @@ def ncdata_copy(ncdata: NcData) -> NcData: Notes ----- This operation is now also available as an object method: - :meth:`~ncdata.NcData.copy`. For example: + :meth:`~ncdata.NcData.copy`. Syntactically, this is generally more convenient, but the operation is identical. + For example: .. testsetup:: >>> from ncdata import NcData - >>> from ncdata.utils import dataset_differences, ncdata_copy + >>> from ncdata.utils import ncdata_copy >>> data = NcData() .. doctest:: >>> data1 = ncdata_copy(data) >>> data2 = data.copy() - >>> dataset_differences(data1, data2) == [] + >>> data1 == data2 True """ From 915d9b70242df40390bc9da4efeef155bc2c4539 Mon Sep 17 00:00:00 2001 From: Patrick Peglar Date: Fri, 3 Oct 2025 10:35:13 +0100 Subject: [PATCH 6/7] More improvements. --- docs/changelog_fragments/68.feat.rst | 2 +- docs/details/developer_notes.rst | 6 +-- .../userdocs/user_guide/common_operations.rst | 47 ++++++++++++++--- docs/userdocs/user_guide/data_objects.rst | 34 +++++++------ docs/userdocs/user_guide/howtos.rst | 50 +++++++++++++------ docs/userdocs/user_guide/utilities.rst | 21 ++++---- lib/ncdata/utils/_compare_nc_datasets.py | 17 +++++++ 7 files changed, 127 insertions(+), 50 deletions(-) diff --git a/docs/changelog_fragments/68.feat.rst b/docs/changelog_fragments/68.feat.rst index 20eae62..1d8fed0 100644 --- a/docs/changelog_fragments/68.feat.rst +++ b/docs/changelog_fragments/68.feat.rst @@ -3,4 +3,4 @@ The :class:`ncdata.NcData` objects can be indexed with the ``[]`` operation, or specifed dimensions with the :meth:`~ncdata.NcData.slicer` method. This is based on the new :meth:`~ncdata.utils.index_by_dimensions()` utility method and :class:`~ncdata.utils.Slicer` class. -See: :ref:`indexing_overview` \ No newline at end of file +See: :ref:`utils_indexing` \ No newline at end of file diff --git a/docs/details/developer_notes.rst b/docs/details/developer_notes.rst index c962019..d0b052a 100644 --- a/docs/details/developer_notes.rst +++ b/docs/details/developer_notes.rst @@ -46,7 +46,7 @@ For a full docs-build: .. note:: - * the above is just for *local testing*, when required. + * the above is just for **local testing**, when required. * For PRs (and releases), we also provide *automatic* builds on GitHub, via ReadTheDocs_. @@ -56,9 +56,9 @@ Release actions #. Update the :ref:`change-log page ` in the details section - #. ensure all major changes + PRs are referenced in the :ref:`change_notes` section. + #. start with ``$ towncrier build`` - * The starting point for this is now just : ``$ towncrier build``. + #. ensure all major changes + PRs are referenced in the :ref:`change_notes` section. #. update the "latest version" stated in the :ref:`development_status` section diff --git a/docs/userdocs/user_guide/common_operations.rst b/docs/userdocs/user_guide/common_operations.rst index e78dfe2..454b1d3 100644 --- a/docs/userdocs/user_guide/common_operations.rst +++ b/docs/userdocs/user_guide/common_operations.rst @@ -55,6 +55,17 @@ Example : >>> dataset.variables["x"].avals["units"] = "m s-1" + +There is also an :meth:`~ncdata.NameMap.addall` method, which adds multiple content +objects in one operation. + +.. doctest:: python + + >>> vars = [NcVariable(name) for name in ("a", "b", "c")] + >>> dataset.variables.addall(vars) + >>> list(dataset.variables) + ['x', 'a', 'b', 'c'] + .. _operations_rename: Rename @@ -69,6 +80,18 @@ Example : >>> dataset.variables.rename("x", "y") +result: + +.. doctest:: python + + >>> print(dataset.variables.get("x")) + None + >>> print(dataset.variables.get("y")) + ): y() + y:units = 'm s-1' + > + + .. warning:: Renaming a dimension will not rename references to it (i.e. in variables), which obviously may cause problems. @@ -125,17 +148,29 @@ Equality Testing ---------------- We implement equality operations ``==`` / ``!=`` for all the core data objects. +.. doctest:: + + >>> vA = dataset.variables["a"] + >>> vB = dataset.variables["b"] + >>> vA == vB + False + +.. doctest:: + + >>> dataset == dataset.copy() + True + .. warning:: - The equality testing actually calls the :func:`ncdata.utils.dataset_differences` and - :func:`ncdata.utils.variable_differences` utility functions. + Equality testing for :class:`~ncdata.NcData` and :class:`~ncdata.NcVariable` actually + calls the :func:`ncdata.utils.dataset_differences` and + :func:`ncdata.utils.variable_differences` utilities. This can be very costly if it needs to compare large data arrays. -If you need to avoid comparing large (and possibly lazy) arrays then you should use the +If you need to avoid comparing large (and possibly lazy) arrays then you can use the :func:`ncdata.utils.dataset_differences` and -:func:`ncdata.utils.variable_differences` utility functions directly. -These enable you to use the provided tolerance options, such as ignoring differences in -data content, or accepting that attributes are present in a different order. +:func:`ncdata.utils.variable_differences` utility functions directly instead. +These provide a ``check_var_data=False`` option, to ignore differences in data content. See: :ref:`utils_equality` diff --git a/docs/userdocs/user_guide/data_objects.rst b/docs/userdocs/user_guide/data_objects.rst index 28b3f77..3b4d914 100644 --- a/docs/userdocs/user_guide/data_objects.rst +++ b/docs/userdocs/user_guide/data_objects.rst @@ -186,7 +186,9 @@ However, for most operations on attributes, it is much easier to use the ``.aval property instead. This accesses *the same attributes*, but in the form of a simple "name: value" dictionary. -Thus for example, to fetch an attribute you would usually write just : +Get attribute value +^^^^^^^^^^^^^^^^^^^ +For example, to fetch an attribute you would usually write just : .. testsetup:: @@ -205,23 +207,15 @@ and **not** : .. doctest:: python - >>> # WRONG: this reads an NcAttribute, not its value + >>> # WRONG: this get the NcAttribute object, not its value >>> unit = dataset.variables["x"].attributes["units"] -or: - -.. doctest:: python - - >>> # WRONG: this gets NcAttribute.value as a character array, not a string + >>> # WRONG: this returns a character array, not a string >>> unit = dataset.variables["x"].attributes["units"].value -or even (which is at least correct): - -.. doctest:: python - - >>> unit = dataset.variables["x"].attributes["units"].as_python_value() - +Set attribute value +^^^^^^^^^^^^^^^^^^^ Likewise, to **set** a value, you would normally just .. doctest:: python @@ -236,9 +230,11 @@ and **not** >>> dataset.variables["x"].attributes["units"].value = "K" -Note also, that as the ``.avals`` is a dictionary, you can use standard dictionary -methods such as ``update`` and ``get`` to perform other operations in a relatively -natural, Pythonic way. +``.avals`` as a dictionary +^^^^^^^^^^^^^^^^^^^^^^^^^^ +Note also, that as ``.avals`` is a dictionary, you can use standard dictionary +methods such as ``pop``, ``update`` and ``get`` to perform other operations in a +relatively natural, Pythonic way. .. doctest:: python @@ -247,6 +243,12 @@ natural, Pythonic way. >>> dataset.attributes.update({"experiment": "A407", "expt_run": 704}) +.. note:: + The new ``.avals`` property effectively replaces the old + :meth:`~ncdata.NcData.get_attrval` and :meth:`~ncdata.NcData.set_attrval` methods, + which are now deprecated and will eventually be removed. + + .. _data-constructors: Core Object Constructors diff --git a/docs/userdocs/user_guide/howtos.rst b/docs/userdocs/user_guide/howtos.rst index 7b7a34a..9cfebbd 100644 --- a/docs/userdocs/user_guide/howtos.rst +++ b/docs/userdocs/user_guide/howtos.rst @@ -377,8 +377,8 @@ See: :ref:`copy_notes` Extract a subsection by indexing -------------------------------- -The nicest way is usually just to use the :meth:`~ncdata.Ncdata.slicer` method to specify -dimensions to index, and then index the result. +The nicest way is usually to use the NcData :meth:`~ncdata.Ncdata.slicer` method to +specify dimensions to index, and then index the result. .. testsetup:: @@ -388,22 +388,22 @@ dimensions to index, and then index the result. >>> for nn, dim in full_data.dimensions.items(): ... full_data.variables.add(NcVariable(nn, dimensions=[nn], data=np.arange(dim.size))) -.. doctest:: - - >>> for dimname in full_data.dimensions: - ... print(dimname, ':', full_data.variables[dimname].data) - x : [0 1 2 3 4 5 6] - y : [0 1 2 3 4 5] - .. doctest:: >>> data_region = full_data.slicer("y", "x")[3, 1::2] +effect: + .. doctest:: + >>> for dimname in full_data.dimensions: + ... print("(original)", dimname, ':', full_data.variables[dimname].data) + (original) x : [0 1 2 3 4 5 6] + (original) y : [0 1 2 3 4 5] + >>> for dimname in data_region.dimensions: - ... print(dimname, ':', data_region.variables[dimname].data) - x : [1 3 5] + ... print("(new)", dimname, ':', data_region.variables[dimname].data) + (new) x : [1 3 5] You can also slice data directly, which simply acts on the dimensions in order: @@ -454,8 +454,8 @@ Use the ``dim_chunks`` argument in the :func:`ncdata.netcdf4.from_nc4` function >>> from ncdata.netcdf4 import from_nc4 >>> ds = from_nc4(filepath, dim_chunks={"time": 3}) - >>> print(ds.variables["time"].data.chunksize) - (3,) + >>> print(ds.variables["time"].data.chunks) + ((3, 3, 3, 1),) Save data to a new file @@ -531,8 +531,28 @@ Use :func:`ncdata.xarray.to_xarray` and :func:`ncdata.xarray.from_xarray`. >>> from ncdata.xarray import from_xarray, to_xarray >>> dataset = xarray.open_dataset(filepath) >>> ncdata = from_xarray(dataset) - >>> + + >>> print(ncdata) + + variables: + + + global attributes: + :experiment = 'A301.7' + > + >>> ds2 = to_xarray(ncdata) + >>> print(ds2) + Size: 8B + Dimensions: () + Data variables: + vx float64 8B nan + Attributes: + experiment: A301.7 Note that: @@ -573,7 +593,7 @@ passed using specific dictionary keywords, e.g. ... iris_load_kwargs={'constraints': 'air_temperature'}, ... xr_save_kwargs={'unlimited_dims': ('time',)}, ... ) - ... + Combine data from different input files into one output ------------------------------------------------------- diff --git a/docs/userdocs/user_guide/utilities.rst b/docs/userdocs/user_guide/utilities.rst index 942db7e..e8b6faf 100644 --- a/docs/userdocs/user_guide/utilities.rst +++ b/docs/userdocs/user_guide/utilities.rst @@ -18,7 +18,7 @@ Dataset Equality Testing ------------------------ The functions :func:`~ncdata.utils.dataset_differences` and :func:`~ncdata.utils.variable_differences` produce a list of messages detailing all the -ways in which two datasets are different. +ways in which two datasets or variables are different. For Example: ^^^^^^^^^^^^ @@ -51,19 +51,22 @@ For Example: Dataset "x" dimension has different "unlimited" status : False != True Dataset variable "vx" shapes differ : (5,) != (2,) -.. note:: - To compare isolated variables, the subsidiary routine - :func:`~ncdata.utils.variable_differences` is also provided. +For a short-form test that two things are the same, you can just check that the +results ``== []``. + +By default, these functions compare **everything** about the two arguments. +However, they also have multiple keywords which allow certain *types* of differences to +be ignored, E.G. ``check_dims_order=False``, ``check_var_data=False``. .. note:: The ``==`` and ``!=`` operations on :class:`ncdata.NcData` and - :class:`ncdata.NcVariable` simply call these utility functions to see if there are - any differences. + :class:`ncdata.NcVariable` use these utility functions to check for differences. .. warning:: - As they lack a keyword interface, these operations have no tolerance options - and check absolutely everything. This includes full data-array comparisons, - which could be very costly in time or space if data arrays are large. + As they lack a keyword interface, these operations provide no tolerance options, + so they always check absolutely everything. Especially, they perform **full + data-array comparisons**, which can have very high performance costs if data + arrays are large. .. _utils_indexing: diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index 2084cac..e123830 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -96,6 +96,23 @@ def dataset_differences( A list of "error" strings, describing differences between the inputs. If empty, no differences were found. + Examples + -------- + .. doctest:: + + >>> data = NcData( + ... name="a", + ... variables=[NcVariable("b", data=[1, 2, 3, 4])], + ... attributes={"a1": 4} + ... ) + >>> data2 = data.copy() + >>> data2.avals.update({"a1":3, "v":7}) + >>> data2.variables["b"].data = np.array([1, 7, 3, 99]) # must be an array! + >>> print('\n'.join(dataset_differences(data, data2))) + Dataset attribute lists do not match: ['a1'] != ['a1', 'v'] + Dataset "a1" attribute values differ : 4 != 3 + Dataset variable "b" data contents differ, at 2 points: @INDICES[(1,), (3,)] : LHS=[2, 4], RHS=[7, 99] + See Also -------- :func:`~ncdata.utils.variable_differences` From da03eac11b7f92b2ab612c977fe11bfe6006b5e2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Oct 2025 11:45:10 +0000 Subject: [PATCH 7/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/ncdata/utils/_compare_nc_datasets.py | 1 - lib/ncdata/utils/_save_errors.py | 1 - tests/unit/utils/dim_indexing/test_index_by_dimensions.py | 1 - 3 files changed, 3 deletions(-) diff --git a/lib/ncdata/utils/_compare_nc_datasets.py b/lib/ncdata/utils/_compare_nc_datasets.py index e123830..1d1d1ff 100644 --- a/lib/ncdata/utils/_compare_nc_datasets.py +++ b/lib/ncdata/utils/_compare_nc_datasets.py @@ -14,7 +14,6 @@ import netCDF4 import netCDF4 as nc import numpy as np - from ncdata import NcData, NcVariable diff --git a/lib/ncdata/utils/_save_errors.py b/lib/ncdata/utils/_save_errors.py index 5670e8b..2002245 100644 --- a/lib/ncdata/utils/_save_errors.py +++ b/lib/ncdata/utils/_save_errors.py @@ -4,7 +4,6 @@ import netCDF4 as nc import numpy as np - from ncdata import NcData, NcVariable diff --git a/tests/unit/utils/dim_indexing/test_index_by_dimensions.py b/tests/unit/utils/dim_indexing/test_index_by_dimensions.py index 099b1d7..a0c2200 100644 --- a/tests/unit/utils/dim_indexing/test_index_by_dimensions.py +++ b/tests/unit/utils/dim_indexing/test_index_by_dimensions.py @@ -5,7 +5,6 @@ import numpy as np import pytest - from ncdata.utils import dataset_differences, index_by_dimensions from . import ( # noqa: F401