Skip to content

Commit fdce5ce

Browse files
authored
Merge branch 'main' into dask-in-cfdm
2 parents 2d2810e + 2031dd8 commit fdce5ce

16 files changed

+1016
-127
lines changed

Changelog.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ version NEXTVERSION
33

44
**2024-12-??**
55

6+
* Allow ``'nearest_dtos'`` 2-d regridding to work with discrete
7+
sampling geometry source grids
8+
(https://github.com/NCAS-CMS/cf-python/issues/832)
69
* New method: `cf.Field.filled`
710
(https://github.com/NCAS-CMS/cf-python/issues/811)
811
* New method: `cf.Field.is_discrete_axis`
@@ -35,13 +38,15 @@ version NEXTVERSION
3538
* Fix bug where `cf.normalize_slice` doesn't correctly
3639
handle certain cyclic slices
3740
(https://github.com/NCAS-CMS/cf-python/issues/774)
41+
* Fix bug where `cf.Field.subspace` doesn't always correctly handle
42+
global or near-global cyclic subspaces
43+
(https://github.com/NCAS-CMS/cf-python/issues/828)
3844
* New dependency: ``h5netcdf>=1.3.0``
3945
* New dependency: ``h5py>=3.10.0``
4046
* New dependency: ``s3fs>=2024.2.0``
4147
* Changed dependency: ``1.11.2.0<=cfdm<1.11.3.0``
4248
* Changed dependency: ``cfunits>=3.3.7``
4349

44-
4550
----
4651

4752
version 3.16.2

cf/data/dask_regrid.py

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -506,17 +506,20 @@ def _regrid(
506506
# Note: It is much more efficient to access
507507
# 'weights.indptr', 'weights.indices', and
508508
# 'weights.data' directly, rather than iterating
509-
# over rows of 'weights' and using 'weights.getrow'.
509+
# over rows of 'weights' and using
510+
# 'weights.getrow'. Also, 'np.count_nonzero' is much
511+
# faster than 'np.any' and 'np.all'.
510512
count_nonzero = np.count_nonzero
511513
indptr = weights.indptr.tolist()
512514
indices = weights.indices
513515
data = weights.data
514516
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
515517
mask = src_mask[indices[i0:i1]]
516-
if not count_nonzero(mask):
518+
n_masked = count_nonzero(mask)
519+
if not n_masked:
517520
continue
518521

519-
if mask.all():
522+
if n_masked == mask.size:
520523
dst_mask[j] = True
521524
continue
522525

@@ -528,8 +531,8 @@ def _regrid(
528531

529532
del indptr
530533

531-
elif method in ("linear", "bilinear", "nearest_dtos"):
532-
# 2) Linear and nearest neighbour methods:
534+
elif method in ("linear", "bilinear"):
535+
# 2) Linear methods:
533536
#
534537
# Mask out any row j that contains at least one positive
535538
# (i.e. greater than or equal to 'min_weight') w_ji that
@@ -545,7 +548,9 @@ def _regrid(
545548
# Note: It is much more efficient to access
546549
# 'weights.indptr', 'weights.indices', and
547550
# 'weights.data' directly, rather than iterating
548-
# over rows of 'weights' and using 'weights.getrow'.
551+
# over rows of 'weights' and using
552+
# 'weights.getrow'. Also, 'np.count_nonzero' is much
553+
# faster than 'np.any' and 'np.all'.
549554
count_nonzero = np.count_nonzero
550555
where = np.where
551556
indptr = weights.indptr.tolist()
@@ -561,12 +566,45 @@ def _regrid(
561566

562567
del indptr, pos_data
563568

569+
elif method == "nearest_dtos":
570+
# 3) Nearest neighbour dtos method:
571+
#
572+
# Set to 0 any weight that corresponds to a masked source
573+
# grid cell.
574+
#
575+
# Mask out any row j for which all source grid cells are
576+
# masked.
577+
dst_size = weights.shape[0]
578+
if dst_mask is None:
579+
dst_mask = np.zeros((dst_size,), dtype=bool)
580+
else:
581+
dst_mask = dst_mask.copy()
582+
583+
# Note: It is much more efficient to access
584+
# 'weights.indptr', 'weights.indices', and
585+
# 'weights.data' directly, rather than iterating
586+
# over rows of 'weights' and using
587+
# 'weights.getrow'. Also, 'np.count_nonzero' is much
588+
# faster than 'np.any' and 'np.all'.
589+
count_nonzero = np.count_nonzero
590+
indptr = weights.indptr.tolist()
591+
indices = weights.indices
592+
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
593+
mask = src_mask[indices[i0:i1]]
594+
n_masked = count_nonzero(mask)
595+
if n_masked == mask.size:
596+
dst_mask[j] = True
597+
elif n_masked:
598+
weights.data[np.arange(i0, i1)[mask]] = 0
599+
600+
del indptr
601+
564602
elif method in (
565603
"patch",
566604
"conservative_2nd",
567605
"nearest_stod",
568606
):
569-
# 3) Patch recovery and second-order conservative methods:
607+
# 4) Patch recovery and second-order conservative methods:
570608
#
571609
# A reference source data mask has already been
572610
# incorporated into the weights matrix, and 'a' is assumed

cf/dimensioncoordinate.py

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,11 @@
88
_inplace_enabled,
99
_inplace_enabled_define_and_cleanup,
1010
)
11-
from .functions import _DEPRECATION_ERROR_ATTRIBUTE, _DEPRECATION_ERROR_KWARGS
11+
from .functions import (
12+
_DEPRECATION_ERROR_ATTRIBUTE,
13+
_DEPRECATION_ERROR_KWARGS,
14+
bounds_combination_mode,
15+
)
1216
from .timeduration import TimeDuration
1317
from .units import Units
1418

@@ -246,6 +250,154 @@ def increasing(self):
246250
"""
247251
return self.direction()
248252

253+
@_inplace_enabled(default=False)
254+
def anchor(self, value, cell=False, parameters=None, inplace=False):
255+
"""Anchor the coordinate values.
256+
257+
By default, the coordinate values are transformed so that the
258+
first coordinate is the closest to *value* from above (below)
259+
for increasing (decreasing) coordinates.
260+
261+
If the *cell* parameter is True, then the coordinate values
262+
are transformed so that the first cell either contains
263+
*value*; or is the closest to cell to *value* from above
264+
(below) for increasing (decreasing) coordinates.
265+
266+
.. versionadded:: NEXTVERSION
267+
268+
.. seealso:: `period`, `roll`
269+
270+
:Parameters:
271+
272+
value: scalar array_like
273+
Anchor the coordinate values for the selected cyclic
274+
axis to the *value*. May be any numeric scalar object
275+
that can be converted to a `Data` object (which
276+
includes `numpy` and `Data` objects). If *value* has
277+
units then they must be compatible with those of the
278+
coordinates, otherwise it is assumed to have the same
279+
units as the coordinates.
280+
281+
The coordinate values are transformed so the first
282+
corodinate is the closest to *value* from above (for
283+
increasing coordinates), or the closest to *value* from
284+
above (for decreasing coordinates)
285+
286+
* Increasing coordinates with positive period, P,
287+
are transformed so that *value* lies in the
288+
half-open range (L-P, F], where F and L are the
289+
transformed first and last coordinate values,
290+
respectively.
291+
292+
..
293+
294+
* Decreasing coordinates with positive period, P,
295+
are transformed so that *value* lies in the
296+
half-open range (L+P, F], where F and L are the
297+
transformed first and last coordinate values,
298+
respectively.
299+
300+
*Parameter example:*
301+
If the original coordinates are ``0, 5, ..., 355``
302+
(evenly spaced) and the period is ``360`` then
303+
``value=0`` implies transformed coordinates of ``0,
304+
5, ..., 355``; ``value=-12`` implies transformed
305+
coordinates of ``-10, -5, ..., 345``; ``value=380``
306+
implies transformed coordinates of ``380, 385, ...,
307+
735``.
308+
309+
*Parameter example:*
310+
If the original coordinates are ``355, 350, ..., 0``
311+
(evenly spaced) and the period is ``360`` then
312+
``value=355`` implies transformed coordinates of
313+
``355, 350, ..., 0``; ``value=0`` implies
314+
transformed coordinates of ``0, -5, ..., -355``;
315+
``value=392`` implies transformed coordinates of
316+
``390, 385, ..., 35``.
317+
318+
cell: `bool`, optional
319+
If True, then the coordinate values are transformed so
320+
that the first cell either contains *value*, or is the
321+
closest to cell to *value* from above (below) for
322+
increasing (decreasing) coordinates.
323+
324+
If False (the default) then the coordinate values are
325+
transformed so that the first coordinate is the closest
326+
to *value* from above (below) for increasing
327+
(decreasing) coordinates.
328+
329+
parameters: `dict`, optional
330+
If a `dict` is provided then it will be updated
331+
in-place with parameters which describe thethe
332+
anchoring process.
333+
334+
{{inplace: `bool`, optional}}
335+
336+
:Returns:
337+
338+
`{{class}}` or `None`
339+
The anchored dimension coordinates, or `None` if the
340+
operation was in-place.
341+
342+
"""
343+
d = _inplace_enabled_define_and_cleanup(self)
344+
345+
period = d.period()
346+
if period is None:
347+
raise ValueError(f"Cyclic {d!r} has no period")
348+
349+
value = d._Data.asdata(value)
350+
if not value.Units:
351+
value = value.override_units(d.Units)
352+
elif not value.Units.equivalent(d.Units):
353+
raise ValueError(
354+
f"Anchor value has incompatible units: {value.Units!r}"
355+
)
356+
357+
if cell:
358+
c = d.upper_bounds.persist()
359+
else:
360+
d.persist(inplace=True)
361+
c = d.get_data(_fill_value=False)
362+
363+
if d.increasing:
364+
# Adjust value so it's in the range [c[0], c[0]+period)
365+
n = ((c[0] - value) / period).ceil()
366+
value1 = value + n * period
367+
shift = c.size - np.argmax((c - value1 >= 0).array)
368+
d.roll(0, shift, inplace=True)
369+
if cell:
370+
d0 = d[0].upper_bounds
371+
else:
372+
d0 = d.get_data(_fill_value=False)[0]
373+
374+
n = ((value - d0) / period).ceil()
375+
else:
376+
# Adjust value so it's in the range (c[0]-period, c[0]]
377+
n = ((c[0] - value) / period).floor()
378+
value1 = value + n * period
379+
shift = c.size - np.argmax((value1 - c >= 0).array)
380+
d.roll(0, shift, inplace=True)
381+
if cell:
382+
d0 = d[0].upper_bounds
383+
else:
384+
d0 = d.get_data(_fill_value=False)[0]
385+
386+
n = ((value - d0) / period).floor()
387+
388+
n.persist(inplace=True)
389+
if n:
390+
nperiod = n * period
391+
with bounds_combination_mode("OR"):
392+
d += nperiod
393+
else:
394+
nperiod = 0
395+
396+
if parameters is not None:
397+
parameters.update({"shift": shift, "nperiod": nperiod})
398+
399+
return d
400+
249401
def direction(self):
250402
"""Return True if the dimension coordinate values are
251403
increasing, otherwise return False.

cf/docstring/docstring.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,9 @@
161161
mapped to the closest destination point. A
162162
destination point can be mapped to multiple source
163163
points. Some destination points may not be
164-
mapped. Useful for regridding of categorical data.
164+
mapped. Each regridded value is the sum of its
165+
contributing source elements. Useful for binning or
166+
for categorical data.
165167
166168
* `None`: This is the default and can only be used
167169
when *dst* is a `RegridOperator`.""",

0 commit comments

Comments
 (0)