Skip to content

Commit 0bcc612

Browse files
committed
Check dataset polygons are valid when constructing them
`Convention.polygons` is now defined on the base `Convention` class. It calls a new abstract method `Convention._make_polygons()`. This allows us to do some post processing of the polygons as part of generating them. Invalid polygons generate a warning and are dropped as part of generation. Non-simple polygons generate a warning but are kept. This means the triangulation code doesn't need to perform this step. It doesn't save any time overall, as the polygons still need to be tested either way.
1 parent 4abcb69 commit 0bcc612

File tree

8 files changed

+46
-26
lines changed

8 files changed

+46
-26
lines changed

docs/releases/development.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,8 @@ Next release (in development)
66
in CFGrid2D datasets with no cell bounds (:pr:`154`).
77
* Improved speed of triangulation for convex polygons
88
(:pr:`151`).
9+
* Check all polygons in a dataset are valid as part of generating them.
10+
This will slow down opening new datasets slightly,
11+
but the trade off is worth the added security
12+
after the invalid polygons found in :pr:`154`
13+
(:pr:`156`).

src/emsarray/conventions/_base.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88
from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast
99

1010
import numpy
11+
import shapely
1112
import xarray
12-
from shapely import unary_union
1313
from shapely.geometry import MultiPolygon, Point, Polygon
1414
from shapely.geometry.base import BaseGeometry
1515
from shapely.strtree import STRtree
1616

1717
from emsarray import utils
1818
from emsarray.compat.shapely import SpatialIndex
19-
from emsarray.exceptions import NoSuchCoordinateError
19+
from emsarray.exceptions import InvalidPolygonWarning, NoSuchCoordinateError
2020
from emsarray.operations import depth, point_extraction
2121
from emsarray.plot import (
2222
_requires_plot, animate_on_figure, make_plot_title, plot_on_figure,
@@ -1264,8 +1264,8 @@ def make_quiver(
12641264

12651265
return Quiver(axes, x, y, *values, **kwargs)
12661266

1267-
@property
1268-
@abc.abstractmethod
1267+
@cached_property
1268+
@utils.timed_func
12691269
def polygons(self) -> numpy.ndarray:
12701270
"""A :class:`numpy.ndarray` of :class:`shapely.Polygon` instances
12711271
representing the cells in this dataset.
@@ -1286,6 +1286,24 @@ def polygons(self) -> numpy.ndarray:
12861286
:meth:`ravel_index`
12871287
:attr:`mask`
12881288
"""
1289+
polygons = self._make_polygons()
1290+
1291+
not_none = (polygons != None) # noqa: E711
1292+
invalid_polygon_indices = numpy.flatnonzero(not_none & ~shapely.is_valid(polygons))
1293+
if len(invalid_polygon_indices):
1294+
indices_str = numpy.array2string(
1295+
invalid_polygon_indices, max_line_width=None, threshold=5)
1296+
warnings.warn(
1297+
f"Dropping invalid polygons at indices {indices_str}",
1298+
category=InvalidPolygonWarning)
1299+
polygons[invalid_polygon_indices] = None
1300+
not_none[invalid_polygon_indices] = False
1301+
1302+
polygons.flags.writeable = False
1303+
return polygons
1304+
1305+
@abc.abstractmethod
1306+
def _make_polygons(self) -> numpy.ndarray:
12891307
pass
12901308

12911309
@cached_property
@@ -1337,7 +1355,7 @@ def geometry(self) -> Polygon | MultiPolygon:
13371355
This is equivalent to the union of all polygons in the dataset,
13381356
although specific conventions may have a simpler way of constructing this.
13391357
"""
1340-
return unary_union(self.polygons[self.mask])
1358+
return shapely.unary_union(self.polygons[self.mask])
13411359

13421360
@cached_property
13431361
def bounds(self) -> Bounds:

src/emsarray/conventions/arakawa_c.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,7 @@ def unpack_index(self, index: ArakawaCIndex) -> tuple[ArakawaCGridKind, Sequence
260260
def pack_index(self, grid_kind: ArakawaCGridKind, indexes: Sequence[int]) -> ArakawaCIndex:
261261
return cast(ArakawaCIndex, (grid_kind, *indexes))
262262

263-
@cached_property
264-
@utils.timed_func
265-
def polygons(self) -> numpy.ndarray:
263+
def _make_polygons(self) -> numpy.ndarray:
266264
# Make an array of shape (j, i, 2) of all the nodes
267265
grid = numpy.stack([self.node.longitude.values, self.node.latitude.values], axis=-1)
268266

src/emsarray/conventions/grid.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,7 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None:
408408

409409
return Specificity.LOW
410410

411-
@cached_property
412-
@utils.timed_func
413-
def polygons(self) -> numpy.ndarray:
411+
def _make_polygons(self) -> numpy.ndarray:
414412
lon_bounds = self.topology.longitude_bounds.values
415413
lat_bounds = self.topology.latitude_bounds.values
416414

@@ -576,9 +574,7 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None:
576574

577575
return Specificity.LOW
578576

579-
@cached_property
580-
@utils.timed_func
581-
def polygons(self) -> numpy.ndarray:
577+
def _make_polygons(self) -> numpy.ndarray:
582578
# Construct polygons from the bounds of the cells
583579
lon_bounds = self.topology.longitude_bounds.values
584580
lat_bounds = self.topology.latitude_bounds.values

src/emsarray/conventions/ugrid.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,9 +1076,7 @@ def grid_kinds(self) -> frozenset[UGridKind]:
10761076
items.append(UGridKind.edge)
10771077
return frozenset(items)
10781078

1079-
@cached_property
1080-
@utils.timed_func
1081-
def polygons(self) -> numpy.ndarray:
1079+
def _make_polygons(self) -> numpy.ndarray:
10821080
"""Generate list of Polygons"""
10831081
# X,Y coords of each node
10841082
topology = self.topology

src/emsarray/exceptions.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,19 @@ class EmsarrayError(Exception):
1010
"""
1111

1212

13+
class EmsarrayWarning(Warning):
14+
"""
15+
Base class for all emsarray-specific warning classes.
16+
"""
17+
18+
1319
class ConventionViolationError(EmsarrayError):
1420
"""
1521
A dataset violates its conventions in a way that is not recoverable.
1622
"""
1723

1824

19-
class ConventionViolationWarning(UserWarning):
25+
class ConventionViolationWarning(EmsarrayWarning):
2026
"""
2127
A dataset violates its conventions in a way that we can handle.
2228
For example, an attribute has an invalid type,
@@ -30,3 +36,9 @@ class NoSuchCoordinateError(KeyError, EmsarrayError):
3036
such as in :attr:`.Convention.time_coordinate` and
3137
:attr:`.Convention.depth_coordinate`.
3238
"""
39+
40+
41+
class InvalidPolygonWarning(EmsarrayWarning):
42+
"""
43+
A polygon in a dataset was invalid or not simple.
44+
"""

src/emsarray/operations/triangulate.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,6 @@ def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex]
147147
:func:`triangulate_dataset`,
148148
`Polygon triangulation <https://en.wikipedia.org/wiki/Polygon_triangulation>`_
149149
"""
150-
if not polygon.is_simple:
151-
raise ValueError("_triangulate_polygon only supports simple polygons")
152-
153150
# The 'ear clipping' method used below is correct for all polygons, but not
154151
# performant. If the polygon is convex we can use a shortcut method.
155152
if polygon.equals(polygon.convex_hull):
@@ -174,9 +171,6 @@ def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex]
174171
# Most polygons will be either squares, convex quadrilaterals, or convex
175172
# polygons.
176173

177-
# Maintain a consistent winding order
178-
polygon = polygon.normalize()
179-
180174
triangles: list[tuple[Vertex, Vertex, Vertex]] = []
181175
# Note that shapely polygons with n vertices will be closed, and thus have
182176
# n+1 coordinates. We trim that superfluous coordinate off in the next line

tests/conventions/test_base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,7 @@ def wind(
118118
def drop_geometry(self) -> xarray.Dataset:
119119
return self.dataset
120120

121-
@cached_property
122-
def polygons(self) -> numpy.ndarray:
121+
def _make_polygons(self) -> numpy.ndarray:
123122
height, width = self.shape
124123
# Each polygon is a box from (x, y, x+1, y+1),
125124
# however the polygons around the edge are masked out with None.

0 commit comments

Comments
 (0)