|
| 1 | +from typing import Sequence, Union |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | +import xarray as xr |
| 6 | + |
| 7 | + |
| 8 | +def reshape_unique_geometries( |
| 9 | + ds: xr.Dataset, |
| 10 | + geom_var: str = "geometry", |
| 11 | + new_dim: str = "features", |
| 12 | +) -> xr.Dataset: |
| 13 | + """Reshape a dataset containing a geometry variable so that all unique features are |
| 14 | + identified along a new dimension. |
| 15 | +
|
| 16 | + This function only makes sense if the dimension of the geometry variable has no coordinate, |
| 17 | + or if that coordinate has repeated values for each geometry. |
| 18 | +
|
| 19 | + Parameters |
| 20 | + ---------- |
| 21 | + ds : xr.Dataset |
| 22 | + A Dataset. |
| 23 | + geom_var : string |
| 24 | + Name of the variable in `ds` that contains the geometry objects of type shapely.geometry. |
| 25 | + The variable must be 1D. |
| 26 | + new_dim : string |
| 27 | + Name of the new dimension in the returned object. |
| 28 | +
|
| 29 | + Returns |
| 30 | + ------- |
| 31 | + Dataset |
| 32 | + All variables sharing the dimension of `ds[geom_var]` are reshaped so that `new_dim` |
| 33 | + as a length equal to the number of unique geometries. |
| 34 | + """ |
| 35 | + if ds[geom_var].ndim > 1: |
| 36 | + raise ValueError( |
| 37 | + f"The geometry variable must be 1D. Got ds[{geom_var}] with dims {ds[geom_var].dims}." |
| 38 | + ) |
| 39 | + |
| 40 | + # Shapely objects are not hashable, thus np.unique cannot be used directly. |
| 41 | + # This trick is stolen from geopandas. |
| 42 | + _, unique_indexes, inv_indexes = np.unique( |
| 43 | + [g.wkb for g in ds[geom_var].values], return_index=True, return_inverse=True |
| 44 | + ) |
| 45 | + old_name = ds[geom_var].dims[0] |
| 46 | + |
| 47 | + if old_name in ds.coords: |
| 48 | + old_values = ds[old_name].values |
| 49 | + else: |
| 50 | + # A dummy coord, a kind of counter, independent for each unique geometries |
| 51 | + old_values = np.array( |
| 52 | + [(inv_indexes[:i] == ind).sum() for i, ind in enumerate(inv_indexes)] |
| 53 | + ) |
| 54 | + |
| 55 | + multi_index = pd.MultiIndex.from_arrays( |
| 56 | + (inv_indexes, old_values), names=(new_dim, old_name) |
| 57 | + ) |
| 58 | + temp_name = "__temp_multi_index__" |
| 59 | + out = ds.rename({old_name: temp_name}) |
| 60 | + out[temp_name] = multi_index |
| 61 | + out = out.unstack(temp_name) |
| 62 | + |
| 63 | + # geom_var was reshaped also, reconstruct it from the unique values. |
| 64 | + unique_indexes = xr.DataArray(unique_indexes, dims=(new_dim,)) |
| 65 | + out[geom_var] = ds[geom_var].isel({old_name: unique_indexes}) |
| 66 | + if old_name not in ds.coords: |
| 67 | + # If there was no coord before, drop the dummy one we made. |
| 68 | + out = out.drop_vars(old_name) |
| 69 | + return out |
| 70 | + |
| 71 | + |
| 72 | +def shapely_to_cf(geometries: Union[xr.DataArray, Sequence], grid_mapping: str = None): |
| 73 | + """Convert a DataArray with shapely geometry objects into a CF-compliant dataset. |
| 74 | +
|
| 75 | + .. warning:: |
| 76 | + Only point geometries are currently implemented. |
| 77 | +
|
| 78 | + Parameters |
| 79 | + ---------- |
| 80 | + geometries : sequence of shapely geometries or xarray.DataArray |
| 81 | + A sequence of geometry objects or a Dataset with a "geometry" variable storing such geometries. |
| 82 | + All geometries must be of the same base type : Point, Line or Polygon, but multipart geometries are accepted. |
| 83 | + grid_mapping : str, optional |
| 84 | + A CF grid mapping name. When given, coordinates and attributes are named and set accordingly. |
| 85 | + Defaults to None, in which case the coordinates are simply names "crd_x" and "crd_y". |
| 86 | +
|
| 87 | + .. warning:: |
| 88 | + Only the `longitude_latitude` grid mapping is currently implemented. |
| 89 | +
|
| 90 | + Returns |
| 91 | + ------- |
| 92 | + xr.Dataset |
| 93 | + A dataset with shapely geometry objects translated into CF-compliant variables : |
| 94 | + - 'x', 'y' : the node coordinates |
| 95 | + - 'crd_x', 'crd_y' : the feature coordinates (might have different names if `grid_mapping` is available). |
| 96 | + - 'node_count' : The number of nodes per feature. Absent if all instances are Points. |
| 97 | + - 'geometry_container' : Empty variable with attributes describing the geometry type. |
| 98 | + - Other variables are not implemented as only Points are currently understood. |
| 99 | + """ |
| 100 | + # Get all types to call the appropriate translation function. |
| 101 | + types = { |
| 102 | + geom.item().geom_type if isinstance(geom, xr.DataArray) else geom.geom_type |
| 103 | + for geom in geometries |
| 104 | + } |
| 105 | + if types.issubset({"Point", "MultiPoint"}): |
| 106 | + ds = points_to_cf(geometries) |
| 107 | + elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset( |
| 108 | + {"LineString", "MultiLineString"} |
| 109 | + ): |
| 110 | + raise NotImplementedError("Only point geometries conversion is implemented.") |
| 111 | + else: |
| 112 | + raise ValueError( |
| 113 | + f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}" |
| 114 | + ) |
| 115 | + |
| 116 | + # Special treatment of selected grid mappings |
| 117 | + if grid_mapping == "longitude_latitude": |
| 118 | + # Special case for longitude_latitude grid mapping |
| 119 | + ds = ds.rename(crd_x="lon", crd_y="lat") |
| 120 | + ds.lon.attrs.update(units="degrees_east", standard_name="longitude") |
| 121 | + ds.lat.attrs.update(units="degrees_north", standard_name="latitude") |
| 122 | + ds.geometry_container.attrs.update(coordinates="lon lat") |
| 123 | + ds.x.attrs.update(units="degrees_east", standard_name="longitude") |
| 124 | + ds.y.attrs.update(units="degrees_north", standard_name="latitude") |
| 125 | + elif grid_mapping is not None: |
| 126 | + raise NotImplementedError( |
| 127 | + f"Only grid mapping longitude_latitude is implemented. Got {grid_mapping}." |
| 128 | + ) |
| 129 | + |
| 130 | + return ds |
| 131 | + |
| 132 | + |
| 133 | +def cf_to_shapely(ds: xr.Dataset): |
| 134 | + """Convert geometries stored in a CF-compliant way to shapely objects stored in a single variable. |
| 135 | +
|
| 136 | + .. warning:: |
| 137 | + Only point geometries are currently implemented. |
| 138 | +
|
| 139 | + Parameters |
| 140 | + ---------- |
| 141 | + ds : xr.Dataset |
| 142 | + Must contain a `geometry_container` variable with attributes giving the geometry specifications. |
| 143 | + Must contain all variables needed to reconstruct the geometries listed in these specifications. |
| 144 | +
|
| 145 | + Returns |
| 146 | + ------- |
| 147 | + xr.DataArray |
| 148 | + A 1D DataArray of shapely objects. |
| 149 | + It has the same dimension as the `node_count` or the coordinates variables, or |
| 150 | + 'features' if those were not present in `ds `. |
| 151 | + """ |
| 152 | + geom_type = ds.geometry_container.attrs["geometry_type"] |
| 153 | + if geom_type == "point": |
| 154 | + geometries = cf_to_points(ds) |
| 155 | + elif geom_type in ["line", "polygon"]: |
| 156 | + raise NotImplementedError("Only point geometries conversion is implemented.") |
| 157 | + else: |
| 158 | + raise ValueError( |
| 159 | + f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}" |
| 160 | + ) |
| 161 | + |
| 162 | + return geometries.rename("geometry") |
| 163 | + |
| 164 | + |
| 165 | +def points_to_cf(pts: Union[xr.DataArray, Sequence]): |
| 166 | + """Get a list of points (shapely.geometry.[Multi]Point) and return a CF-compliant geometry dataset. |
| 167 | +
|
| 168 | + Parameters |
| 169 | + ---------- |
| 170 | + pts : sequence of shapely.geometry.Point or MultiPoint |
| 171 | + The sequence of [multi]points to translate to a CF dataset. |
| 172 | +
|
| 173 | + Returns |
| 174 | + ------- |
| 175 | + xr.Dataset |
| 176 | + A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container'. |
| 177 | + The coordinates of MultiPoint instances are their first point. |
| 178 | + """ |
| 179 | + if isinstance(pts, xr.DataArray): |
| 180 | + dim = pts.dims[0] |
| 181 | + coord = pts[dim] if dim in pts.coords else None |
| 182 | + pts = pts.values |
| 183 | + else: |
| 184 | + dim = "features" |
| 185 | + coord = None |
| 186 | + |
| 187 | + x, y, node_count, crdX, crdY = [], [], [], [], [] |
| 188 | + for pt in pts: |
| 189 | + xy = np.atleast_2d(np.array(pt)) |
| 190 | + x.extend(xy[:, 0]) |
| 191 | + y.extend(xy[:, 1]) |
| 192 | + node_count.append(xy.shape[0]) |
| 193 | + crdX.append(xy[0, 0]) |
| 194 | + crdY.append(xy[0, 1]) |
| 195 | + |
| 196 | + ds = xr.Dataset( |
| 197 | + data_vars={ |
| 198 | + "node_count": xr.DataArray(node_count, dims=(dim,)), |
| 199 | + "geometry_container": xr.DataArray( |
| 200 | + attrs={ |
| 201 | + "geometry_type": "point", |
| 202 | + "node_count": "node_count", |
| 203 | + "node_coordinates": "x y", |
| 204 | + "coordinates": "crd_x crd_y", |
| 205 | + } |
| 206 | + ), |
| 207 | + }, |
| 208 | + coords={ |
| 209 | + "x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}), |
| 210 | + "y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}), |
| 211 | + "crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}), |
| 212 | + "crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}), |
| 213 | + }, |
| 214 | + ) |
| 215 | + |
| 216 | + if coord is not None: |
| 217 | + ds = ds.assign_coords({dim: coord}) |
| 218 | + |
| 219 | + # Special case when we have no MultiPoints |
| 220 | + if (ds.node_count == 1).all(): |
| 221 | + ds = ds.drop_vars("node_count") |
| 222 | + del ds.geometry_container.attrs["node_count"] |
| 223 | + return ds |
| 224 | + |
| 225 | + |
| 226 | +def cf_to_points(ds: xr.Dataset): |
| 227 | + """Convert point geometries stored in a CF-compliant way to shapely points stored in a single variable. |
| 228 | +
|
| 229 | + Parameters |
| 230 | + ---------- |
| 231 | + ds : xr.Dataset |
| 232 | + A dataset with CF-compliant point geometries. |
| 233 | + Must have a "geometry_container" variable with at least a 'node_coordinates' attribute. |
| 234 | + Must also have the two 1D variables listed by this attribute. |
| 235 | +
|
| 236 | + Returns |
| 237 | + ------- |
| 238 | + geometry : xr.DataArray |
| 239 | + A 1D array of shapely.geometry.[Multi]Point objects. |
| 240 | + It has the same dimension as the `node_count` or the coordinates variables, or |
| 241 | + 'features' if those were not present in `ds `. |
| 242 | + """ |
| 243 | + from shapely.geometry import MultiPoint, Point |
| 244 | + |
| 245 | + # Shorthand for convenience |
| 246 | + geo = ds.geometry_container.attrs |
| 247 | + |
| 248 | + # The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present. |
| 249 | + feat_dim = None |
| 250 | + if "coordinates" in geo and feat_dim is None: |
| 251 | + xcoord_name, _ = geo["coordinates"].split(" ") |
| 252 | + (feat_dim,) = ds[xcoord_name].dims |
| 253 | + |
| 254 | + x_name, y_name = ds.geometry_container.attrs["node_coordinates"].split(" ") |
| 255 | + xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) |
| 256 | + |
| 257 | + node_count_name = ds.geometry_container.attrs.get("node_count") |
| 258 | + if node_count_name is None: |
| 259 | + # No node_count means all geometries are single points (node_count = 1) |
| 260 | + # And if we had no coordinates, then the dimension defaults to "features" |
| 261 | + feat_dim = feat_dim or "features" |
| 262 | + node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,)) |
| 263 | + if feat_dim in ds.coords: |
| 264 | + node_count = node_count.assign_coords({feat_dim: ds[feat_dim]}) |
| 265 | + else: |
| 266 | + node_count = ds[node_count_name] |
| 267 | + |
| 268 | + j = 0 # The index of the first node. |
| 269 | + geoms = np.empty(node_count.shape, dtype=object) |
| 270 | + # i is the feature index, n its number of nodes |
| 271 | + for i, n in enumerate(node_count.values): |
| 272 | + if n == 1: |
| 273 | + geoms[i] = Point(xy[j, :]) |
| 274 | + else: |
| 275 | + geoms[i] = MultiPoint(xy[j : j + n, :]) |
| 276 | + j += n |
| 277 | + |
| 278 | + return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) |
0 commit comments