Skip to content

Commit a5aa1d6

Browse files
aulemahaldcherian
andauthored
Add basic geometry conversion (#260)
* Add basic geometry conversion points only - missing tests and example * Add tests, upd whats new and api * Apply suggestions from code review Co-authored-by: Deepak Cherian <[email protected]> * rename methods - format warnings * Add forgotten test file * Update doc/whats-new.rst Co-authored-by: Deepak Cherian <[email protected]> * Small fix to doc after external testing and review * add error testing and fix some shapely things Co-authored-by: Deepak Cherian <[email protected]>
1 parent 1fde526 commit a5aa1d6

File tree

8 files changed

+443
-1
lines changed

8 files changed

+443
-1
lines changed

cf_xarray/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .accessor import CFAccessor # noqa
2+
from .geometry import cf_to_shapely, shapely_to_cf # noqa
23
from .helpers import bounds_to_vertices, vertices_to_bounds # noqa
34
from .options import set_options # noqa
45
from .utils import _get_version

cf_xarray/geometry.py

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
from typing import Sequence, Union
2+
3+
import numpy as np
4+
import pandas as pd
5+
import xarray as xr
6+
7+
8+
def reshape_unique_geometries(
9+
ds: xr.Dataset,
10+
geom_var: str = "geometry",
11+
new_dim: str = "features",
12+
) -> xr.Dataset:
13+
"""Reshape a dataset containing a geometry variable so that all unique features are
14+
identified along a new dimension.
15+
16+
This function only makes sense if the dimension of the geometry variable has no coordinate,
17+
or if that coordinate has repeated values for each geometry.
18+
19+
Parameters
20+
----------
21+
ds : xr.Dataset
22+
A Dataset.
23+
geom_var : string
24+
Name of the variable in `ds` that contains the geometry objects of type shapely.geometry.
25+
The variable must be 1D.
26+
new_dim : string
27+
Name of the new dimension in the returned object.
28+
29+
Returns
30+
-------
31+
Dataset
32+
All variables sharing the dimension of `ds[geom_var]` are reshaped so that `new_dim`
33+
as a length equal to the number of unique geometries.
34+
"""
35+
if ds[geom_var].ndim > 1:
36+
raise ValueError(
37+
f"The geometry variable must be 1D. Got ds[{geom_var}] with dims {ds[geom_var].dims}."
38+
)
39+
40+
# Shapely objects are not hashable, thus np.unique cannot be used directly.
41+
# This trick is stolen from geopandas.
42+
_, unique_indexes, inv_indexes = np.unique(
43+
[g.wkb for g in ds[geom_var].values], return_index=True, return_inverse=True
44+
)
45+
old_name = ds[geom_var].dims[0]
46+
47+
if old_name in ds.coords:
48+
old_values = ds[old_name].values
49+
else:
50+
# A dummy coord, a kind of counter, independent for each unique geometries
51+
old_values = np.array(
52+
[(inv_indexes[:i] == ind).sum() for i, ind in enumerate(inv_indexes)]
53+
)
54+
55+
multi_index = pd.MultiIndex.from_arrays(
56+
(inv_indexes, old_values), names=(new_dim, old_name)
57+
)
58+
temp_name = "__temp_multi_index__"
59+
out = ds.rename({old_name: temp_name})
60+
out[temp_name] = multi_index
61+
out = out.unstack(temp_name)
62+
63+
# geom_var was reshaped also, reconstruct it from the unique values.
64+
unique_indexes = xr.DataArray(unique_indexes, dims=(new_dim,))
65+
out[geom_var] = ds[geom_var].isel({old_name: unique_indexes})
66+
if old_name not in ds.coords:
67+
# If there was no coord before, drop the dummy one we made.
68+
out = out.drop_vars(old_name)
69+
return out
70+
71+
72+
def shapely_to_cf(geometries: Union[xr.DataArray, Sequence], grid_mapping: str = None):
73+
"""Convert a DataArray with shapely geometry objects into a CF-compliant dataset.
74+
75+
.. warning::
76+
Only point geometries are currently implemented.
77+
78+
Parameters
79+
----------
80+
geometries : sequence of shapely geometries or xarray.DataArray
81+
A sequence of geometry objects or a Dataset with a "geometry" variable storing such geometries.
82+
All geometries must be of the same base type : Point, Line or Polygon, but multipart geometries are accepted.
83+
grid_mapping : str, optional
84+
A CF grid mapping name. When given, coordinates and attributes are named and set accordingly.
85+
Defaults to None, in which case the coordinates are simply names "crd_x" and "crd_y".
86+
87+
.. warning::
88+
Only the `longitude_latitude` grid mapping is currently implemented.
89+
90+
Returns
91+
-------
92+
xr.Dataset
93+
A dataset with shapely geometry objects translated into CF-compliant variables :
94+
- 'x', 'y' : the node coordinates
95+
- 'crd_x', 'crd_y' : the feature coordinates (might have different names if `grid_mapping` is available).
96+
- 'node_count' : The number of nodes per feature. Absent if all instances are Points.
97+
- 'geometry_container' : Empty variable with attributes describing the geometry type.
98+
- Other variables are not implemented as only Points are currently understood.
99+
"""
100+
# Get all types to call the appropriate translation function.
101+
types = {
102+
geom.item().geom_type if isinstance(geom, xr.DataArray) else geom.geom_type
103+
for geom in geometries
104+
}
105+
if types.issubset({"Point", "MultiPoint"}):
106+
ds = points_to_cf(geometries)
107+
elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset(
108+
{"LineString", "MultiLineString"}
109+
):
110+
raise NotImplementedError("Only point geometries conversion is implemented.")
111+
else:
112+
raise ValueError(
113+
f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}"
114+
)
115+
116+
# Special treatment of selected grid mappings
117+
if grid_mapping == "longitude_latitude":
118+
# Special case for longitude_latitude grid mapping
119+
ds = ds.rename(crd_x="lon", crd_y="lat")
120+
ds.lon.attrs.update(units="degrees_east", standard_name="longitude")
121+
ds.lat.attrs.update(units="degrees_north", standard_name="latitude")
122+
ds.geometry_container.attrs.update(coordinates="lon lat")
123+
ds.x.attrs.update(units="degrees_east", standard_name="longitude")
124+
ds.y.attrs.update(units="degrees_north", standard_name="latitude")
125+
elif grid_mapping is not None:
126+
raise NotImplementedError(
127+
f"Only grid mapping longitude_latitude is implemented. Got {grid_mapping}."
128+
)
129+
130+
return ds
131+
132+
133+
def cf_to_shapely(ds: xr.Dataset):
134+
"""Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable.
135+
136+
.. warning::
137+
Only point geometries are currently implemented.
138+
139+
Parameters
140+
----------
141+
ds : xr.Dataset
142+
Must contain a `geometry_container` variable with attributes giving the geometry specifications.
143+
Must contain all variables needed to reconstruct the geometries listed in these specifications.
144+
145+
Returns
146+
-------
147+
xr.DataArray
148+
A 1D DataArray of shapely objects.
149+
It has the same dimension as the `node_count` or the coordinates variables, or
150+
'features' if those were not present in `ds `.
151+
"""
152+
geom_type = ds.geometry_container.attrs["geometry_type"]
153+
if geom_type == "point":
154+
geometries = cf_to_points(ds)
155+
elif geom_type in ["line", "polygon"]:
156+
raise NotImplementedError("Only point geometries conversion is implemented.")
157+
else:
158+
raise ValueError(
159+
f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}"
160+
)
161+
162+
return geometries.rename("geometry")
163+
164+
165+
def points_to_cf(pts: Union[xr.DataArray, Sequence]):
166+
"""Get a list of points (shapely.geometry.[Multi]Point) and return a CF-compliant geometry dataset.
167+
168+
Parameters
169+
----------
170+
pts : sequence of shapely.geometry.Point or MultiPoint
171+
The sequence of [multi]points to translate to a CF dataset.
172+
173+
Returns
174+
-------
175+
xr.Dataset
176+
A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'.
177+
The coordinates of MultiPoint instances are their first point.
178+
"""
179+
if isinstance(pts, xr.DataArray):
180+
dim = pts.dims[0]
181+
coord = pts[dim] if dim in pts.coords else None
182+
pts = pts.values
183+
else:
184+
dim = "features"
185+
coord = None
186+
187+
x, y, node_count, crdX, crdY = [], [], [], [], []
188+
for pt in pts:
189+
xy = np.atleast_2d(np.array(pt))
190+
x.extend(xy[:, 0])
191+
y.extend(xy[:, 1])
192+
node_count.append(xy.shape[0])
193+
crdX.append(xy[0, 0])
194+
crdY.append(xy[0, 1])
195+
196+
ds = xr.Dataset(
197+
data_vars={
198+
"node_count": xr.DataArray(node_count, dims=(dim,)),
199+
"geometry_container": xr.DataArray(
200+
attrs={
201+
"geometry_type": "point",
202+
"node_count": "node_count",
203+
"node_coordinates": "x y",
204+
"coordinates": "crd_x crd_y",
205+
}
206+
),
207+
},
208+
coords={
209+
"x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}),
210+
"y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}),
211+
"crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}),
212+
"crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}),
213+
},
214+
)
215+
216+
if coord is not None:
217+
ds = ds.assign_coords({dim: coord})
218+
219+
# Special case when we have no MultiPoints
220+
if (ds.node_count == 1).all():
221+
ds = ds.drop_vars("node_count")
222+
del ds.geometry_container.attrs["node_count"]
223+
return ds
224+
225+
226+
def cf_to_points(ds: xr.Dataset):
227+
"""Convert point geometries stored in a CF-compliant way to shapely points stored in a single variable.
228+
229+
Parameters
230+
----------
231+
ds : xr.Dataset
232+
A dataset with CF-compliant point geometries.
233+
Must have a "geometry_container" variable with at least a 'node_coordinates' attribute.
234+
Must also have the two 1D variables listed by this attribute.
235+
236+
Returns
237+
-------
238+
geometry : xr.DataArray
239+
A 1D array of shapely.geometry.[Multi]Point objects.
240+
It has the same dimension as the `node_count` or the coordinates variables, or
241+
'features' if those were not present in `ds `.
242+
"""
243+
from shapely.geometry import MultiPoint, Point
244+
245+
# Shorthand for convenience
246+
geo = ds.geometry_container.attrs
247+
248+
# The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present.
249+
feat_dim = None
250+
if "coordinates" in geo and feat_dim is None:
251+
xcoord_name, _ = geo["coordinates"].split(" ")
252+
(feat_dim,) = ds[xcoord_name].dims
253+
254+
x_name, y_name = ds.geometry_container.attrs["node_coordinates"].split(" ")
255+
xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1)
256+
257+
node_count_name = ds.geometry_container.attrs.get("node_count")
258+
if node_count_name is None:
259+
# No node_count means all geometries are single points (node_count = 1)
260+
# And if we had no coordinates, then the dimension defaults to "features"
261+
feat_dim = feat_dim or "features"
262+
node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,))
263+
if feat_dim in ds.coords:
264+
node_count = node_count.assign_coords({feat_dim: ds[feat_dim]})
265+
else:
266+
node_count = ds[node_count_name]
267+
268+
j = 0 # The index of the first node.
269+
geoms = np.empty(node_count.shape, dtype=object)
270+
# i is the feature index, n its number of nodes
271+
for i, n in enumerate(node_count.values):
272+
if n == 1:
273+
geoms[i] = Point(xy[j, :])
274+
else:
275+
geoms[i] = MultiPoint(xy[j : j + n, :])
276+
j += n
277+
278+
return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords)

cf_xarray/tests/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,4 @@ def LooseVersion(vstring):
6464

6565

6666
has_pint, requires_pint = _importorskip("pint")
67+
has_shapely, requires_shapely = _importorskip("shapely")

0 commit comments

Comments
 (0)