Skip to content

Commit 19e1574

Browse files
authored
Merge pull request #156 from csiro-coasts/test-valid-polygons
Test polygons are valid after constructing them
2 parents 4abcb69 + 0bcc612 commit 19e1574

File tree

8 files changed

+46
-26
lines changed

8 files changed

+46
-26
lines changed

docs/releases/development.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,8 @@ Next release (in development)
66
in CFGrid2D datasets with no cell bounds (:pr:`154`).
77
* Improved speed of triangulation for convex polygons
88
(:pr:`151`).
9+
* Check all polygons in a dataset are valid as part of generating them.
10+
This will slow down opening new datasets slightly,
11+
but the trade off is worth the added security
12+
after the invalid polygons found in :pr:`154`
13+
(:pr:`156`).

src/emsarray/conventions/_base.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88
from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar, cast
99

1010
import numpy
11+
import shapely
1112
import xarray
12-
from shapely import unary_union
1313
from shapely.geometry import MultiPolygon, Point, Polygon
1414
from shapely.geometry.base import BaseGeometry
1515
from shapely.strtree import STRtree
1616

1717
from emsarray import utils
1818
from emsarray.compat.shapely import SpatialIndex
19-
from emsarray.exceptions import NoSuchCoordinateError
19+
from emsarray.exceptions import InvalidPolygonWarning, NoSuchCoordinateError
2020
from emsarray.operations import depth, point_extraction
2121
from emsarray.plot import (
2222
_requires_plot, animate_on_figure, make_plot_title, plot_on_figure,
@@ -1264,8 +1264,8 @@ def make_quiver(
12641264

12651265
return Quiver(axes, x, y, *values, **kwargs)
12661266

1267-
@property
1268-
@abc.abstractmethod
1267+
@cached_property
1268+
@utils.timed_func
12691269
def polygons(self) -> numpy.ndarray:
12701270
"""A :class:`numpy.ndarray` of :class:`shapely.Polygon` instances
12711271
representing the cells in this dataset.
@@ -1286,6 +1286,24 @@ def polygons(self) -> numpy.ndarray:
12861286
:meth:`ravel_index`
12871287
:attr:`mask`
12881288
"""
1289+
polygons = self._make_polygons()
1290+
1291+
not_none = (polygons != None) # noqa: E711
1292+
invalid_polygon_indices = numpy.flatnonzero(not_none & ~shapely.is_valid(polygons))
1293+
if len(invalid_polygon_indices):
1294+
indices_str = numpy.array2string(
1295+
invalid_polygon_indices, max_line_width=None, threshold=5)
1296+
warnings.warn(
1297+
f"Dropping invalid polygons at indices {indices_str}",
1298+
category=InvalidPolygonWarning)
1299+
polygons[invalid_polygon_indices] = None
1300+
not_none[invalid_polygon_indices] = False
1301+
1302+
polygons.flags.writeable = False
1303+
return polygons
1304+
1305+
@abc.abstractmethod
1306+
def _make_polygons(self) -> numpy.ndarray:
12891307
pass
12901308

12911309
@cached_property
@@ -1337,7 +1355,7 @@ def geometry(self) -> Polygon | MultiPolygon:
13371355
This is equivalent to the union of all polygons in the dataset,
13381356
although specific conventions may have a simpler way of constructing this.
13391357
"""
1340-
return unary_union(self.polygons[self.mask])
1358+
return shapely.unary_union(self.polygons[self.mask])
13411359

13421360
@cached_property
13431361
def bounds(self) -> Bounds:

src/emsarray/conventions/arakawa_c.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,7 @@ def unpack_index(self, index: ArakawaCIndex) -> tuple[ArakawaCGridKind, Sequence
260260
def pack_index(self, grid_kind: ArakawaCGridKind, indexes: Sequence[int]) -> ArakawaCIndex:
261261
return cast(ArakawaCIndex, (grid_kind, *indexes))
262262

263-
@cached_property
264-
@utils.timed_func
265-
def polygons(self) -> numpy.ndarray:
263+
def _make_polygons(self) -> numpy.ndarray:
266264
# Make an array of shape (j, i, 2) of all the nodes
267265
grid = numpy.stack([self.node.longitude.values, self.node.latitude.values], axis=-1)
268266

src/emsarray/conventions/grid.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,7 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None:
408408

409409
return Specificity.LOW
410410

411-
@cached_property
412-
@utils.timed_func
413-
def polygons(self) -> numpy.ndarray:
411+
def _make_polygons(self) -> numpy.ndarray:
414412
lon_bounds = self.topology.longitude_bounds.values
415413
lat_bounds = self.topology.latitude_bounds.values
416414

@@ -576,9 +574,7 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None:
576574

577575
return Specificity.LOW
578576

579-
@cached_property
580-
@utils.timed_func
581-
def polygons(self) -> numpy.ndarray:
577+
def _make_polygons(self) -> numpy.ndarray:
582578
# Construct polygons from the bounds of the cells
583579
lon_bounds = self.topology.longitude_bounds.values
584580
lat_bounds = self.topology.latitude_bounds.values

src/emsarray/conventions/ugrid.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,9 +1076,7 @@ def grid_kinds(self) -> frozenset[UGridKind]:
10761076
items.append(UGridKind.edge)
10771077
return frozenset(items)
10781078

1079-
@cached_property
1080-
@utils.timed_func
1081-
def polygons(self) -> numpy.ndarray:
1079+
def _make_polygons(self) -> numpy.ndarray:
10821080
"""Generate list of Polygons"""
10831081
# X,Y coords of each node
10841082
topology = self.topology

src/emsarray/exceptions.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,19 @@ class EmsarrayError(Exception):
1010
"""
1111

1212

13+
class EmsarrayWarning(Warning):
14+
"""
15+
Base class for all emsarray-specific warning classes.
16+
"""
17+
18+
1319
class ConventionViolationError(EmsarrayError):
1420
"""
1521
A dataset violates its conventions in a way that is not recoverable.
1622
"""
1723

1824

19-
class ConventionViolationWarning(UserWarning):
25+
class ConventionViolationWarning(EmsarrayWarning):
2026
"""
2127
A dataset violates its conventions in a way that we can handle.
2228
For example, an attribute has an invalid type,
@@ -30,3 +36,9 @@ class NoSuchCoordinateError(KeyError, EmsarrayError):
3036
such as in :attr:`.Convention.time_coordinate` and
3137
:attr:`.Convention.depth_coordinate`.
3238
"""
39+
40+
41+
class InvalidPolygonWarning(EmsarrayWarning):
42+
"""
43+
A polygon in a dataset was invalid or not simple.
44+
"""

src/emsarray/operations/triangulate.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,9 +147,6 @@ def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex]
147147
:func:`triangulate_dataset`,
148148
`Polygon triangulation <https://en.wikipedia.org/wiki/Polygon_triangulation>`_
149149
"""
150-
if not polygon.is_simple:
151-
raise ValueError("_triangulate_polygon only supports simple polygons")
152-
153150
# The 'ear clipping' method used below is correct for all polygons, but not
154151
# performant. If the polygon is convex we can use a shortcut method.
155152
if polygon.equals(polygon.convex_hull):
@@ -174,9 +171,6 @@ def _triangulate_polygon(polygon: Polygon) -> list[tuple[Vertex, Vertex, Vertex]
174171
# Most polygons will be either squares, convex quadrilaterals, or convex
175172
# polygons.
176173

177-
# Maintain a consistent winding order
178-
polygon = polygon.normalize()
179-
180174
triangles: list[tuple[Vertex, Vertex, Vertex]] = []
181175
# Note that shapely polygons with n vertices will be closed, and thus have
182176
# n+1 coordinates. We trim that superfluous coordinate off in the next line

tests/conventions/test_base.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,7 @@ def wind(
118118
def drop_geometry(self) -> xarray.Dataset:
119119
return self.dataset
120120

121-
@cached_property
122-
def polygons(self) -> numpy.ndarray:
121+
def _make_polygons(self) -> numpy.ndarray:
123122
height, width = self.shape
124123
# Each polygon is a box from (x, y, x+1, y+1),
125124
# however the polygons around the edge are masked out with None.

0 commit comments

Comments
 (0)