Skip to content

Commit 09ec18e

Browse files
Merge pull request #893 from davidhassell/regrid-weights-chunks
Allow regridding for very large grids
2 parents ed814fa + 3acbdf0 commit 09ec18e

19 files changed

+1546
-620
lines changed

Changelog.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,13 @@ Version NEXTVERSION
55

66
* Python 3.9 support removed
77
(https://github.com/NCAS-CMS/cf-python/issues/896)
8+
* Allow regridding for very large grids. New keyword parameter to
9+
`cf.Field.regrids` and `cf.Field.regridc`: ``dst_grid_partitions``
10+
(https://github.com/NCAS-CMS/cf-python/issues/878)
811
* Changed dependency: ``Python>=3.10.0``
912

13+
----
14+
1015
Version 3.18.1
1116
--------------
1217

README.md

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ of its array manipulation and can:
111111
* regrid structured grid, mesh and DSG field constructs with
112112
(multi-)linear, nearest neighbour, first- and second-order
113113
conservative and higher order patch recovery methods, including 3-d
114-
regridding,
114+
regridding, and large-grid support,
115115
* apply convolution filters to field constructs,
116116
* create running means from field constructs,
117117
* apply differential operators to field constructs,
@@ -125,12 +125,8 @@ Visualization
125125
Powerful and flexible visualizations of `cf` field constructs,
126126
designed to be produced and configured in as few lines of code as
127127
possible, are available with the [cf-plot
128-
package](https://ncas-cms.github.io/cf-plot/build/index.html), which
129-
needs to be installed separately to the `cf` package.
130-
131-
See the [cf-plot
132-
gallery](https://ncas-cms.github.io/cf-plot/build/gallery.html) for a
133-
range of plotting possibilities with example code.
128+
package](https://ncas-cms.github.io/cf-plot), which needs to be
129+
installed separately to the `cf` package.
134130

135131
![Example outputs of cf-plot displaying selected aspects of `cf` field constructs](https://raw.githubusercontent.com/NCAS-CMS/cf-plot/master/docs/source/images/cf_gallery_image.png)
136132

cf/data/dask_regrid.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -507,10 +507,10 @@ def _regrid(
507507
# 'weights.indptr', 'weights.indices', and
508508
# 'weights.data' directly, rather than iterating
509509
# over rows of 'weights' and using
510-
# 'weights.getrow'. Also, 'np.count_nonzero' is much
511-
# faster than 'np.any' and 'np.all'.
510+
# 'weights.getrow'. Also, `np.count_nonzero` is much
511+
# faster than `np.any` and `np.all`.
512512
count_nonzero = np.count_nonzero
513-
indptr = weights.indptr.tolist()
513+
indptr = weights.indptr
514514
indices = weights.indices
515515
data = weights.data
516516
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
@@ -529,8 +529,6 @@ def _regrid(
529529
w[mask] = 0
530530
data[i0:i1] = w
531531

532-
del indptr
533-
534532
elif method in ("linear", "bilinear"):
535533
# 2) Linear methods:
536534
#
@@ -549,23 +547,21 @@ def _regrid(
549547
# 'weights.indptr', 'weights.indices', and
550548
# 'weights.data' directly, rather than iterating
551549
# over rows of 'weights' and using
552-
# 'weights.getrow'. Also, 'np.count_nonzero' is much
553-
# faster than 'np.any' and 'np.all'.
550+
# 'weights.getrow'. Also, `np.count_nonzero` is much
551+
# faster than `np.any` and `np.all`.
554552
count_nonzero = np.count_nonzero
555553
where = np.where
556-
indptr = weights.indptr.tolist()
554+
indptr = weights.indptr
557555
indices = weights.indices
558-
pos_data = weights.data >= min_weight
556+
data = weights.data
559557
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
560558
mask = src_mask[indices[i0:i1]]
561559
if not count_nonzero(mask):
562560
continue
563561

564-
if where((mask) & (pos_data[i0:i1]))[0].size:
562+
if where(data[i0:i1][mask] >= min_weight)[0].size:
565563
dst_mask[j] = True
566564

567-
del indptr, pos_data
568-
569565
elif method == "nearest_dtos":
570566
# 3) Nearest neighbour dtos method:
571567
#
@@ -584,10 +580,10 @@ def _regrid(
584580
# 'weights.indptr', 'weights.indices', and
585581
# 'weights.data' directly, rather than iterating
586582
# over rows of 'weights' and using
587-
# 'weights.getrow'. Also, 'np.count_nonzero' is much
588-
# faster than 'np.any' and 'np.all'.
583+
# 'weights.getrow'. Also, `np.count_nonzero` is much
584+
# faster than `np.any` and `np.all`.
589585
count_nonzero = np.count_nonzero
590-
indptr = weights.indptr.tolist()
586+
indptr = weights.indptr
591587
indices = weights.indices
592588
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
593589
mask = src_mask[indices[i0:i1]]
@@ -597,8 +593,6 @@ def _regrid(
597593
elif n_masked:
598594
weights.data[np.arange(i0, i1)[mask]] = 0
599595

600-
del indptr
601-
602596
elif method in (
603597
"patch",
604598
"conservative_2nd",

cf/docstring/docstring.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,11 @@
7474
weights with the source data. (Note that whilst the `esmpy`
7575
package is also able to create the regridded data from its
7676
weights, this feature can't be integrated with the `dask`
77-
framework that underpins the field's data.)""",
77+
framework that underpins the field's data.)
78+
79+
The calculation of weights for large grids can have a very
80+
high memory requirement, but this can be reduced by setting
81+
the *dst_grid_partitions* parameter.""",
7882
# regrid Logging
7983
"{{regrid Logging}}": """**Logging**
8084
@@ -436,9 +440,10 @@
436440
437441
**Performance**
438442
439-
The computation of the weights can be much more costly
440-
than the regridding itself, in which case reading
441-
pre-calculated weights can improve performance.
443+
The computation of the weights can take much longer,
444+
and take much more memory, than the regridding itself,
445+
in which case reading pre-calculated weights can
446+
improve performance.
442447
443448
Ignored if *dst* is a `RegridOperator`.""",
444449
# aggregated_units
@@ -564,6 +569,43 @@
564569
If True then do not perform the regridding, rather
565570
return the `esmpy.Regrid` instance that defines the
566571
regridding operation.""",
572+
# dst_grid_partitions
573+
"{{dst_grid_partitions: `int` or `str`, optional}}": """dst_grid_partitions: `int` or `str`, optional
574+
Calculating the weights matrix for grids with a very large
575+
number of source and/or destination grid points can
576+
potentially require more memory than is
577+
available. However, the memory requirement can be greatly
578+
reduced by calculating weights separately for
579+
non-overlapping partitions of the destination grid, and
580+
then combining the weights from each partition to create
581+
the final weights matrix. The more partitions there are,
582+
the smaller the memory requirement for the weights
583+
calculations, at the expense of the weights calculations
584+
taking longer.
585+
586+
The *dst_grid_partitions* parameter sets the number of
587+
destination grid partitions for the weights
588+
calculations. The default value is ``1``, i.e. one
589+
partition for the entire destination grid, maximising
590+
memory usage and minimising the calculation time. If the
591+
string ``'maximum'`` is given then the largest possible
592+
number of partitions of the destination grid will be used,
593+
minimising memory usage and maximising the calculation
594+
time. A positive integer specifies the exact number of
595+
partitions, capped by the maximum allowed, allowing the
596+
balance between memory usage and calculation time to be
597+
adjusted.
598+
599+
The actual number of destination grid partitions and each
600+
partition's shape, and weights calculation time and memory
601+
requirement are displayed when ``'DEBUG'`` logging is
602+
activated. See *verbose* for details.
603+
604+
.. note:: If setting *dst_grid_partitions* is required for
605+
the regridding to work, then it is worth
606+
considering storing the weights in a file for
607+
fast future access, via the *weights_file*
608+
parameter.""",
567609
# ----------------------------------------------------------------
568610
# Method description substitutions (4 levels of indentation)
569611
# ----------------------------------------------------------------

cf/field.py

Lines changed: 30 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -382,14 +382,6 @@ def __getitem__(self, indices):
382382
(6, 4, 3)
383383

384384
"""
385-
debug = is_log_level_debug(logger)
386-
387-
if debug:
388-
logger.debug(
389-
self.__class__.__name__ + ".__getitem__"
390-
) # pragma: no cover
391-
logger.debug(f" input indices = {indices}") # pragma: no cover
392-
393385
if indices is Ellipsis:
394386
return self.copy()
395387

@@ -437,12 +429,6 @@ def __getitem__(self, indices):
437429
else:
438430
findices = indices
439431

440-
if debug:
441-
logger.debug(f" shape = {shape}") # pragma: no cover
442-
logger.debug(f" indices = {indices}") # pragma: no cover
443-
logger.debug(f" indices2 = {indices2}") # pragma: no cover
444-
logger.debug(f" findices = {findices}") # pragma: no cover
445-
446432
new_data = data[tuple(findices)]
447433

448434
if 0 in new_data.shape:
@@ -496,11 +482,6 @@ def __getitem__(self, indices):
496482
else:
497483
dice.append(slice(None))
498484

499-
if debug:
500-
logger.debug(
501-
f" dice = {tuple(dice)}"
502-
) # pragma: no cover
503-
504485
# Generally we do not apply an ancillary mask to the
505486
# metadata items, but for DSGs we do.
506487
if ancillary_mask and new.DSG:
@@ -12985,6 +12966,7 @@ def regrids(
1298512966
ln_z=None,
1298612967
verbose=None,
1298712968
return_esmpy_regrid_operator=False,
12969+
dst_grid_partitions=1,
1298812970
inplace=False,
1298912971
i=False,
1299012972
_compute_field_mass=None,
@@ -13229,6 +13211,17 @@ def regrids(
1322913211

1323013212
.. versionadded:: 3.16.2
1323113213

13214+
{{dst_grid_partitions: `int` or `str`, optional}}
13215+
13216+
The maximum number of partitions, Nmax, depends on the
13217+
nature of the destination grid: If the Z axis is being
13218+
regridded, Nmax = the size of the Z axis. For a 2-d
13219+
structured grid, Nmax = the size of the Y axis. For a
13220+
UGRID, HEALPix, or DSG grid, Nmax = the size of the
13221+
horizontal discrete axis.
13222+
13223+
.. versionadded:: NEXTVERSION
13224+
1323213225
axis_order: sequence, optional
1323313226
Deprecated at version 3.14.0.
1323413227

@@ -13322,11 +13315,13 @@ def regrids(
1332213315
z=z,
1332313316
ln_z=ln_z,
1332413317
return_esmpy_regrid_operator=return_esmpy_regrid_operator,
13318+
dst_grid_partitions=dst_grid_partitions,
1332513319
inplace=inplace,
1332613320
)
1332713321

1332813322
@_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0")
1332913323
@_inplace_enabled(default=False)
13324+
@_manage_log_level_via_verbosity
1333013325
def regridc(
1333113326
self,
1333213327
dst,
@@ -13346,6 +13341,8 @@ def regridc(
1334613341
z=None,
1334713342
ln_z=None,
1334813343
return_esmpy_regrid_operator=False,
13344+
dst_grid_partitions=1,
13345+
verbose=None,
1334913346
inplace=False,
1335013347
i=False,
1335113348
_compute_field_mass=None,
@@ -13525,6 +13522,19 @@ def regridc(
1352513522

1352613523
.. versionadded:: 3.16.2
1352713524

13525+
{{dst_grid_partitions: `int` or `str`, optional}}
13526+
13527+
Partitioning is only available for 2-d or 3-d
13528+
regridding. The maximum number of partitions is the
13529+
size of the first of the destination grid axes
13530+
specified by the *axes* parameter.
13531+
13532+
.. versionadded:: NEXTVERSION
13533+
13534+
{{verbose: `int` or `str` or `None`, optional}}
13535+
13536+
.. versionadded:: NEXTVERSION
13537+
1352813538
axis_order: sequence, optional
1352913539
Deprecated at version 3.14.0.
1353013540

@@ -13617,6 +13627,7 @@ def regridc(
1361713627
z=z,
1361813628
ln_z=ln_z,
1361913629
return_esmpy_regrid_operator=return_esmpy_regrid_operator,
13630+
dst_grid_partitions=dst_grid_partitions,
1362013631
inplace=inplace,
1362113632
)
1362213633

cf/mixin/propertiesdatabounds.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22

33
import numpy as np
4-
from cfdm import is_log_level_debug, is_log_level_info
4+
from cfdm import is_log_level_info
55

66
from ..data import Data
77
from ..decorators import (
@@ -81,15 +81,6 @@ def __getitem__(self, indices):
8181
else:
8282
findices = tuple(indices)
8383

84-
cname = self.__class__.__name__
85-
if is_log_level_debug(logger):
86-
logger.debug(
87-
f"{cname}.__getitem__: shape = {self.shape}\n"
88-
f"{cname}.__getitem__: indices2 = {indices2}\n"
89-
f"{cname}.__getitem__: indices = {indices}\n"
90-
f"{cname}.__getitem__: findices = {findices}"
91-
) # pragma: no cover
92-
9384
data = self.get_data(None, _fill_value=False)
9485
if data is not None:
9586
new_data = data[findices]
@@ -133,12 +124,6 @@ def __getitem__(self, indices):
133124
mask.insert_dimension(-1) for mask in findices[1]
134125
]
135126

136-
if is_log_level_debug(logger):
137-
logger.debug(
138-
f"{self.__class__.__name__}.__getitem__: findices for "
139-
f"bounds = {tuple(findices)}"
140-
) # pragma: no cover
141-
142127
new.bounds.set_data(bounds_data[tuple(findices)], copy=False)
143128

144129
# Remove the direction, as it may now be wrong

0 commit comments

Comments
 (0)