NCAS-CMS
diff --git a/‎Changelog.rst‎
Lines changed: 5 additions & 0 deletions b/‎Changelog.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 7 deletions b/‎README.md‎
Lines changed: 3 additions & 7 deletions
diff --git a/‎cf/data/dask_regrid.py‎
Lines changed: 11 additions & 17 deletions b/‎cf/data/dask_regrid.py‎
Lines changed: 11 additions & 17 deletions
diff --git a/‎cf/docstring/docstring.py‎
Lines changed: 46 additions & 4 deletions b/‎cf/docstring/docstring.py‎
Lines changed: 46 additions & 4 deletions
diff --git a/‎cf/field.py‎
Lines changed: 30 additions & 19 deletions b/‎cf/field.py‎
Lines changed: 30 additions & 19 deletions
diff --git a/‎cf/mixin/propertiesdatabounds.py‎
Lines changed: 1 addition & 16 deletions b/‎cf/mixin/propertiesdatabounds.py‎
Lines changed: 1 addition & 16 deletions
@@ -5,8 +5,13 @@ Version NEXTVERSION
 
 * Python 3.9 support removed
   (https://github.com/NCAS-CMS/cf-python/issues/896)
+* Allow regridding for very large grids. New keyword parameter to
+  `cf.Field.regrids` and `cf.Field.regridc`: ``dst_grid_partitions``
+  (https://github.com/NCAS-CMS/cf-python/issues/878)
 * Changed dependency: ``Python>=3.10.0``
 
+----
+
 Version 3.18.1
 --------------
 
 
@@ -111,7 +111,7 @@ of its array manipulation and can:
 * regrid structured grid, mesh and DSG field constructs with
   (multi-)linear, nearest neighbour, first- and second-order
   conservative and higher order patch recovery methods, including 3-d
-  regridding,
+  regridding, and large-grid support,
 * apply convolution filters to field constructs,
 * create running means from field constructs,
 * apply differential operators to field constructs,
@@ -125,12 +125,8 @@ Visualization
 Powerful and flexible visualizations of `cf` field constructs,
 designed to be produced and configured in as few lines of code as
 possible, are available with the [cf-plot
-package](https://ncas-cms.github.io/cf-plot/build/index.html), which
-needs to be installed separately to the `cf` package.
-
-See the [cf-plot
-gallery](https://ncas-cms.github.io/cf-plot/build/gallery.html) for a
-range of plotting possibilities with example code.
+package](https://ncas-cms.github.io/cf-plot), which needs to be
+installed separately to the `cf` package.
 
 ![Example outputs of cf-plot displaying selected aspects of `cf` field constructs](https://raw.githubusercontent.com/NCAS-CMS/cf-plot/master/docs/source/images/cf_gallery_image.png)
 
 
@@ -507,10 +507,10 @@ def _regrid(
             #       'weights.indptr', 'weights.indices', and
             #       'weights.data' directly, rather than iterating
             #       over rows of 'weights' and using
-            #       'weights.getrow'. Also, 'np.count_nonzero' is much
-            #       faster than 'np.any' and 'np.all'.
+            #       'weights.getrow'. Also, `np.count_nonzero` is much
+            #       faster than `np.any` and `np.all`.
             count_nonzero = np.count_nonzero
-            indptr = weights.indptr.tolist()
+            indptr = weights.indptr
             indices = weights.indices
             data = weights.data
             for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
@@ -529,8 +529,6 @@ def _regrid(
                 w[mask] = 0
                 data[i0:i1] = w
 
-            del indptr
-
         elif method in ("linear", "bilinear"):
             # 2) Linear methods:
             #
@@ -549,23 +547,21 @@ def _regrid(
             #       'weights.indptr', 'weights.indices', and
             #       'weights.data' directly, rather than iterating
             #       over rows of 'weights' and using
-            #       'weights.getrow'. Also, 'np.count_nonzero' is much
-            #       faster than 'np.any' and 'np.all'.
+            #       'weights.getrow'. Also, `np.count_nonzero` is much
+            #       faster than `np.any` and `np.all`.
             count_nonzero = np.count_nonzero
             where = np.where
-            indptr = weights.indptr.tolist()
+            indptr = weights.indptr
             indices = weights.indices
-            pos_data = weights.data >= min_weight
+            data = weights.data
             for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
                 mask = src_mask[indices[i0:i1]]
                 if not count_nonzero(mask):
                     continue
 
-                if where((mask) & (pos_data[i0:i1]))[0].size:
+                if where(data[i0:i1][mask] >= min_weight)[0].size:
                     dst_mask[j] = True
 
-            del indptr, pos_data
-
         elif method == "nearest_dtos":
             # 3) Nearest neighbour dtos method:
             #
@@ -584,10 +580,10 @@ def _regrid(
             #       'weights.indptr', 'weights.indices', and
             #       'weights.data' directly, rather than iterating
             #       over rows of 'weights' and using
-            #       'weights.getrow'. Also, 'np.count_nonzero' is much
-            #       faster than 'np.any' and 'np.all'.
+            #       'weights.getrow'. Also, `np.count_nonzero` is much
+            #       faster than `np.any` and `np.all`.
             count_nonzero = np.count_nonzero
-            indptr = weights.indptr.tolist()
+            indptr = weights.indptr
             indices = weights.indices
             for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
                 mask = src_mask[indices[i0:i1]]
@@ -597,8 +593,6 @@ def _regrid(
                 elif n_masked:
                     weights.data[np.arange(i0, i1)[mask]] = 0
 
-            del indptr
-
         elif method in (
             "patch",
             "conservative_2nd",
 
@@ -74,7 +74,11 @@
         weights with the source data. (Note that whilst the `esmpy`
         package is also able to create the regridded data from its
         weights, this feature can't be integrated with the `dask`
-        framework that underpins the field's data.)""",
+        framework that underpins the field's data.)
+
+        The calculation of weights for large grids can have a very
+        high memory requirement, but this can be reduced by setting
+        the *dst_grid_partitions* parameter.""",
     # regrid Logging
     "{{regrid Logging}}": """**Logging**
 
@@ -436,9 +440,10 @@
 
                 **Performance**
 
-                The computation of the weights can be much more costly
-                than the regridding itself, in which case reading
-                pre-calculated weights can improve performance.
+                The computation of the weights can take much longer,
+                and take much more memory, than the regridding itself,
+                in which case reading pre-calculated weights can
+                improve performance.
 
                 Ignored if *dst* is a `RegridOperator`.""",
     # aggregated_units
@@ -564,6 +569,43 @@
                 If True then do not perform the regridding, rather
                 return the `esmpy.Regrid` instance that defines the
                 regridding operation.""",
+    # dst_grid_partitions
+    "{{dst_grid_partitions: `int` or `str`, optional}}": """dst_grid_partitions: `int` or `str`, optional
+            Calculating the weights matrix for grids with a very large
+            number of source and/or destination grid points can
+            potentially require more memory than is
+            available. However, the memory requirement can be greatly
+            reduced by calculating weights separately for
+            non-overlapping partitions of the destination grid, and
+            then combining the weights from each partition to create
+            the final weights matrix. The more partitions there are,
+            the smaller the memory requirement for the weights
+            calculations, at the expense of the weights calculations
+            taking longer.
+
+            The *dst_grid_partitions* parameter sets the number of
+            destination grid partitions for the weights
+            calculations. The default value is ``1``, i.e. one
+            partition for the entire destination grid, maximising
+            memory usage and minimising the calculation time. If the
+            string ``'maximum'`` is given then the largest possible
+            number of partitions of the destination grid will be used,
+            minimising memory usage and maximising the calculation
+            time. A positive integer specifies the exact number of
+            partitions, capped by the maximum allowed, allowing the
+            balance between memory usage and calculation time to be
+            adjusted.
+
+            The actual number of destination grid partitions and each
+            partition's shape, and weights calculation time and memory
+            requirement are displayed when ``'DEBUG'`` logging is
+            activated. See *verbose* for details.
+
+            .. note:: If setting *dst_grid_partitions* is required for
+                      the regridding to work, then it is worth
+                      considering storing the weights in a file for
+                      fast future access, via the *weights_file*
+                      parameter.""",
     # ----------------------------------------------------------------
     # Method description substitutions (4 levels of indentation)
     # ----------------------------------------------------------------
 
@@ -382,14 +382,6 @@ def __getitem__(self, indices):
         (6, 4, 3)
 
         """
-        debug = is_log_level_debug(logger)
-
-        if debug:
-            logger.debug(
-                self.__class__.__name__ + ".__getitem__"
-            )  # pragma: no cover
-            logger.debug(f"    input indices = {indices}")  # pragma: no cover
-
         if indices is Ellipsis:
             return self.copy()
 
@@ -437,12 +429,6 @@ def __getitem__(self, indices):
         else:
             findices = indices
 
-        if debug:
-            logger.debug(f"    shape    = {shape}")  # pragma: no cover
-            logger.debug(f"    indices  = {indices}")  # pragma: no cover
-            logger.debug(f"    indices2 = {indices2}")  # pragma: no cover
-            logger.debug(f"    findices = {findices}")  # pragma: no cover
-
         new_data = data[tuple(findices)]
 
         if 0 in new_data.shape:
@@ -496,11 +482,6 @@ def __getitem__(self, indices):
                     else:
                         dice.append(slice(None))
 
-                if debug:
-                    logger.debug(
-                        f"    dice = {tuple(dice)}"
-                    )  # pragma: no cover
-
                 # Generally we do not apply an ancillary mask to the
                 # metadata items, but for DSGs we do.
                 if ancillary_mask and new.DSG:
@@ -12985,6 +12966,7 @@ def regrids(
         ln_z=None,
         verbose=None,
         return_esmpy_regrid_operator=False,
+        dst_grid_partitions=1,
         inplace=False,
         i=False,
         _compute_field_mass=None,
@@ -13229,6 +13211,17 @@ def regrids(
 
                 .. versionadded:: 3.16.2
 
+            {{dst_grid_partitions: `int` or `str`, optional}}
+
+                The maximum number of partitions, Nmax, depends on the
+                nature of the destination grid: If the Z axis is being
+                regridded, Nmax = the size of the Z axis. For a 2-d
+                structured grid, Nmax = the size of the Y axis. For a
+                UGRID, HEALPix, or DSG grid, Nmax = the size of the
+                horizontal discrete axis.
+
+                .. versionadded:: NEXTVERSION
+
             axis_order: sequence, optional
                 Deprecated at version 3.14.0.
 
@@ -13322,11 +13315,13 @@ def regrids(
             z=z,
             ln_z=ln_z,
             return_esmpy_regrid_operator=return_esmpy_regrid_operator,
+            dst_grid_partitions=dst_grid_partitions,
             inplace=inplace,
         )
 
     @_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0")
     @_inplace_enabled(default=False)
+    @_manage_log_level_via_verbosity
     def regridc(
         self,
         dst,
@@ -13346,6 +13341,8 @@ def regridc(
         z=None,
         ln_z=None,
         return_esmpy_regrid_operator=False,
+        dst_grid_partitions=1,
+        verbose=None,
         inplace=False,
         i=False,
         _compute_field_mass=None,
@@ -13525,6 +13522,19 @@ def regridc(
 
                 .. versionadded:: 3.16.2
 
+            {{dst_grid_partitions: `int` or `str`, optional}}
+
+                Partitioning is only available for 2-d or 3-d
+                regridding. The maximum number of partitions is the
+                size of the first of the destination grid axes
+                specified by the *axes* parameter.
+
+                .. versionadded:: NEXTVERSION
+
+            {{verbose: `int` or `str` or `None`, optional}}
+
+                .. versionadded:: NEXTVERSION
+
             axis_order: sequence, optional
                 Deprecated at version 3.14.0.
 
@@ -13617,6 +13627,7 @@ def regridc(
             z=z,
             ln_z=ln_z,
             return_esmpy_regrid_operator=return_esmpy_regrid_operator,
+            dst_grid_partitions=dst_grid_partitions,
             inplace=inplace,
         )
 
 
@@ -1,7 +1,7 @@
 import logging
 
 import numpy as np
-from cfdm import is_log_level_debug, is_log_level_info
+from cfdm import is_log_level_info
 
 from ..data import Data
 from ..decorators import (
@@ -81,15 +81,6 @@ def __getitem__(self, indices):
         else:
             findices = tuple(indices)
 
-        cname = self.__class__.__name__
-        if is_log_level_debug(logger):
-            logger.debug(
-                f"{cname}.__getitem__: shape    = {self.shape}\n"
-                f"{cname}.__getitem__: indices2 = {indices2}\n"
-                f"{cname}.__getitem__: indices  = {indices}\n"
-                f"{cname}.__getitem__: findices = {findices}"
-            )  # pragma: no cover
-
         data = self.get_data(None, _fill_value=False)
         if data is not None:
             new_data = data[findices]
@@ -133,12 +124,6 @@ def __getitem__(self, indices):
                         mask.insert_dimension(-1) for mask in findices[1]
                     ]
 
-                if is_log_level_debug(logger):
-                    logger.debug(
-                        f"{self.__class__.__name__}.__getitem__: findices for "
-                        f"bounds = {tuple(findices)}"
-                    )  # pragma: no cover
-
                 new.bounds.set_data(bounds_data[tuple(findices)], copy=False)
 
         # Remove the direction, as it may now be wrong