Skip to content

Commit 20bdfdd

Browse files
aulemahaldcherian
andauthored
Fix add_bounds for heavily curved grids (#376)
* Fix add_bounds for heavy curved grids - fix no-index issues - adapt tests * Send bounds guessing to helpers in own functions - upd doc * added figures * Add test * Add mention of no-index dims in error Co-authored-by: Deepak Cherian <[email protected]>
1 parent ff740af commit 20bdfdd

File tree

7 files changed

+138
-36
lines changed

7 files changed

+138
-36
lines changed

cf_xarray/accessor.py

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from xarray.core.weighted import Weighted
3131

3232
from .criteria import cf_role_criteria, coordinate_criteria, regex
33-
from .helpers import bounds_to_vertices
33+
from .helpers import _guess_bounds_1d, _guess_bounds_2d, bounds_to_vertices
3434
from .options import OPTIONS
3535
from .utils import (
3636
_get_version,
@@ -465,7 +465,7 @@ def wrapper(obj: DataArray | Dataset, key: str):
465465
}
466466

467467

468-
def _guess_bounds_dim(da, dim=None, out_dim="bounds"):
468+
def _guess_bounds(da, dim=None, out_dim="bounds"):
469469
"""
470470
Guess bounds values given a 1D or 2D coordinate variable.
471471
Assumes equal spacing on either side of the coordinate label.
@@ -477,43 +477,18 @@ def _guess_bounds_dim(da, dim=None, out_dim="bounds"):
477477
f"If dim is None, variable {da.name} must be 1D or 2D. Received {da.ndim}D variable instead."
478478
)
479479
dim = da.dims
480+
480481
if not isinstance(dim, str):
481482
if len(dim) > 2:
482483
raise NotImplementedError(
483484
"Adding bounds with more than 2 dimensions is not supported."
484485
)
485486
elif len(dim) == 2:
486-
daX = _guess_bounds_dim(da, dim[0]).rename(bounds="Xbnds")
487-
daXY = _guess_bounds_dim(daX, dim[1]).rename(bounds="Ybnds")
488-
return xr.concat(
489-
[
490-
daXY.isel(Xbnds=0, Ybnds=0),
491-
daXY.isel(Xbnds=0, Ybnds=1),
492-
daXY.isel(Xbnds=1, Ybnds=1),
493-
daXY.isel(Xbnds=1, Ybnds=0),
494-
],
495-
out_dim,
496-
).transpose(..., "bounds")
487+
return _guess_bounds_2d(da, dim).rename(bounds=out_dim)
497488
else:
498489
dim = dim[0]
499-
if dim not in da.dims:
500-
(dim,) = da.cf.axes[dim]
501-
if dim not in da.coords:
502-
raise NotImplementedError(
503-
"Adding bounds for unindexed dimensions is not supported currently."
504-
)
505-
506-
diff = da.diff(dim)
507-
lower = da - diff / 2
508-
upper = da + diff / 2
509-
bounds = xr.concat([lower, upper], dim=out_dim)
510490

511-
first = (bounds.isel({dim: 0}) - diff.isel({dim: 0})).assign_coords(
512-
{dim: da[dim][0]}
513-
)
514-
result = xr.concat([first, bounds], dim=dim).transpose(..., "bounds")
515-
516-
return result
491+
return _guess_bounds_1d(da, dim).rename(bounds=out_dim)
517492

518493

519494
def _build_docstring(func):
@@ -2252,15 +2227,17 @@ def add_bounds(
22522227

22532228
bad_vars: set[str] = variables - set(obj.variables)
22542229
if bad_vars:
2255-
raise ValueError(
2256-
f"{bad_vars!r} are not variables in the underlying object."
2257-
)
2230+
msg = f"{bad_vars!r} are not variables in the underlying object."
2231+
dims_no_idx = bad_vars.intersection(obj.dims)
2232+
if dims_no_idx:
2233+
msg += f" {dims_no_idx!r} are dimensions with no index."
2234+
raise ValueError(msg)
22582235

22592236
for var in variables:
22602237
bname = f"{var}_bounds"
22612238
if bname in obj.variables:
22622239
raise ValueError(f"Bounds variable name {bname!r} will conflict!")
2263-
out = _guess_bounds_dim(
2240+
out = _guess_bounds(
22642241
obj[var].reset_coords(drop=True), dim=dim, out_dim=output_dim
22652242
)
22662243
if output_dim in obj.dims and (new := out[output_dim].size) != (

cf_xarray/helpers.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,108 @@
77
from xarray import DataArray
88

99

10+
def _guess_bounds_1d(da, dim):
11+
"""
12+
Guess bounds values given a 1D coordinate variable.
13+
Assumes equal spacing on either side of the coordinate label.
14+
This is an approximation only.
15+
Output has an added "bounds" dimension at the end.
16+
"""
17+
if dim not in da.dims:
18+
(dim,) = da.cf.axes[dim]
19+
ADDED_INDEX = False
20+
if dim not in da.coords:
21+
# For proper alignment in the lines below, we need an index on dim.
22+
da = da.assign_coords({dim: da[dim]})
23+
ADDED_INDEX = True
24+
25+
diff = da.diff(dim)
26+
lower = da - diff / 2
27+
upper = da + diff / 2
28+
bounds = xr.concat([lower, upper], dim="bounds")
29+
30+
first = (bounds.isel({dim: 0}) - diff.isel({dim: 0})).assign_coords(
31+
{dim: da[dim][0]}
32+
)
33+
result = xr.concat([first, bounds], dim=dim).transpose(..., "bounds")
34+
if ADDED_INDEX:
35+
result = result.drop_vars(dim)
36+
return result
37+
38+
39+
def _guess_bounds_2d(da, dims):
40+
"""
41+
Guess bounds values given a 2D coordinate variable.
42+
Assumes equal spacing on either side of the coordinate label.
43+
This is a coarse approximation, especially for curvilinear grids.
44+
Output has an added "bounds" dimension at the end.
45+
"""
46+
daX = _guess_bounds_1d(da, dims[0]).rename(bounds="Xbnds")
47+
daXY = _guess_bounds_1d(daX, dims[1]).rename(bounds="Ybnds")
48+
# At this point, we might have different corners for adjacent cells, we average them together to have a nice grid
49+
# To make this vectorized and keep the edges, we'll pad with NaNs and ignore them in the averages
50+
daXYp = (
51+
daXY.pad({d: (1, 1) for d in dims}, mode="constant", constant_values=np.NaN)
52+
.transpose(*dims, "Xbnds", "Ybnds")
53+
.values
54+
) # Tranpose for an easier notation
55+
# Mean of the corners that should be the same point.
56+
daXYm = np.stack(
57+
(
58+
# Lower left corner (mean of : upper right of the lower left cell, lower right of the upper left cell, and so on, ccw)
59+
np.nanmean(
60+
np.stack(
61+
(
62+
daXYp[:-2, :-2, 1, 1],
63+
daXYp[:-2, 1:-1, 1, 0],
64+
daXYp[1:-1, 1:-1, 0, 0],
65+
daXYp[1:-1, :-2, 0, 1],
66+
)
67+
),
68+
axis=0,
69+
),
70+
# Upper left corner
71+
np.nanmean(
72+
np.stack(
73+
(
74+
daXYp[:-2, 1:-1, 1, 1],
75+
daXYp[:-2, 2:, 1, 0],
76+
daXYp[1:-1, 2:, 0, 0],
77+
daXYp[1:-1, 1:-1, 0, 1],
78+
)
79+
),
80+
axis=0,
81+
),
82+
# Upper right
83+
np.nanmean(
84+
np.stack(
85+
(
86+
daXYp[1:-1, 1:-1, 1, 1],
87+
daXYp[1:-1, 2:, 1, 0],
88+
daXYp[2:, 2:, 0, 0],
89+
daXYp[2:, 1:-1, 0, 1],
90+
)
91+
),
92+
axis=0,
93+
),
94+
# Lower right
95+
np.nanmean(
96+
np.stack(
97+
(
98+
daXYp[1:-1, :-2, 1, 1],
99+
daXYp[1:-1, 1:-1, 1, 0],
100+
daXYp[2:, 1:-1, 0, 0],
101+
daXYp[2:, :-2, 0, 1],
102+
)
103+
),
104+
axis=0,
105+
),
106+
),
107+
axis=-1,
108+
)
109+
return xr.DataArray(daXYm, dims=(*dims, "bounds"), coords=da.coords)
110+
111+
10112
def bounds_to_vertices(
11113
bounds: DataArray,
12114
bounds_dim: str,

cf_xarray/tests/test_accessor.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
pomds,
3030
popds,
3131
romsds,
32+
rotds,
3233
vert,
3334
)
3435
from . import raise_if_dask_computes, requires_cftime, requires_pint, requires_scipy
@@ -818,6 +819,18 @@ def test_add_bounds_nd_variable():
818819
actual = ds.cf.add_bounds("z").z_bounds.reset_coords(drop=True)
819820
xr.testing.assert_identical(actual, expected)
820821

822+
# 2D rotated ds
823+
lon_bounds = (
824+
rotds.drop_vars(["lon_bounds"])
825+
.assign(x=rotds["x"], y=rotds["y"])
826+
.cf.add_bounds(["lon"])
827+
.lon_bounds
828+
)
829+
# upper left of cell must be the EXACT same as the lower left of the cell above
830+
assert lon_bounds[0, 1, 1] == lon_bounds[0, 2, 0]
831+
# upper right of cell must be the EXACT same as the lower right of the cell above
832+
assert lon_bounds[0, 1, 2] == lon_bounds[0, 2, 3]
833+
821834
# 1D
822835
expected = (
823836
xr.concat([ds.z - 1.5, ds.z + 1.5], dim="bounds")
@@ -828,8 +841,9 @@ def test_add_bounds_nd_variable():
828841
actual = ds.cf.add_bounds("z", dim="x").z_bounds.reset_coords(drop=True)
829842
xr.testing.assert_identical(expected.transpose(..., "bounds"), actual)
830843

831-
with pytest.raises(NotImplementedError):
832-
ds.drop_vars("x").cf.add_bounds("z", dim="x")
844+
# Requesting bounds on a non-variable dimension
845+
with pytest.raises(ValueError, match="are dimensions with no index."):
846+
ds.drop_vars("x").cf.add_bounds("x")
833847

834848
with pytest.raises(ValueError, match="The `bounds` dimension already exists"):
835849
ds.cf.add_bounds("z").cf.add_bounds("x")

doc/2D_bounds_averaged.png

45.7 KB
Loading

doc/2D_bounds_nonunique.png

52 KB
Loading

doc/bounds.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,10 @@ See
1616
As an example, we present a "rotated pole" grid. It is defined on a rotated rectilinear grid which uses the `rlat` and `rlon` 1D coordinates, over North America at a resolution of 0.44°. The datasets comes with 2D `lat` and `lon` coordinates. `cf_xarray` will estimate the bounds by linear interpolation (extrapolation at the edges) of the existing `lon` and `lat`, which yields good results on parts of the grid where the rotation is small. However the errors is larger in other places, as seen when visualizing the distance in degrees between the estimated bounds and the true bounds.
1717

1818
![2d bounds error](2D_bounds_error.png)
19+
20+
For grids with a strong curvature between the cartesian axes and the lat/lon coordinates, the basic linear interpolation done for each point individually can yield grid cells with unmatching corners. The next figure shows such a case as it would be expected that the 4 corners within the red circle would all be the same point. To circumvent this issue, `cf_xarray` will average together these 4 different results, as shown on the last figure.
21+
22+
![2d bounds unmatching corners](2D_bounds_nonunique.png)
23+
![2d bounds averaged corners](2D_bounds_averaged.png)
24+
25+
This last examples illustrates again that `cf_xarray` can only estimate the grid bounds, grid metrics provided by the data producer will always be better.

doc/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ What's New
66
v0.7.6 (unreleased)
77
===================
88

9+
- Fix to ``cf.add_bounds`` to support all types of curved grids (:pr:`376`).
10+
By `Pascal Bourgault`_
911
- Allow custom criteria to match the variable name of DataArray objects (:pr:`379`). By `Mathias Hauser`_
1012

1113
v0.7.5 (Nov 15, 2022)

0 commit comments

Comments
 (0)