Skip to content

Commit 60be9ce

Browse files
quentinblampeypre-commit-ci[bot]LucaMarconato
authored
Account for the padding shift during vectorization + use multipolygon (#921)
* account for the padding shift during vectorization + use multipolygon * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * use correct typing MultiPolygon | Polygon + assert polygons * stricter check _region_props_to_polygon() --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Luca Marconato <[email protected]>
1 parent 23c2b72 commit 60be9ce

File tree

2 files changed

+51
-15
lines changed

2 files changed

+51
-15
lines changed

src/spatialdata/_core/operations/vectorize.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -227,30 +227,34 @@ def _vectorize_chunk(chunk: np.ndarray, yoff: int, xoff: int) -> GeoDataFrame:
227227
return ShapesModel.parse(gdf, transformations=transformations.copy())
228228

229229

230-
def _region_props_to_polygons(region_props: RegionProperties) -> list[Polygon]:
230+
def _region_props_to_polygon(region_props: RegionProperties) -> MultiPolygon | Polygon:
231231
mask = np.pad(region_props.image, 1)
232232
contours = skimage.measure.find_contours(mask, 0.5)
233233

234-
# shapes with <= 3 vertices, i.e. lines, can't be converted into a polygon
235-
polygons = [Polygon(contour[:, [1, 0]]) for contour in contours if contour.shape[0] >= 4]
234+
invalid_polygons = any(contour.shape[0] <= 3 for contour in contours)
235+
if invalid_polygons:
236+
# this should not happen because even a single pixel should can be converted to a polygon
237+
raise RuntimeError("Invalid polygon found in the region properties.")
238+
polygons = [Polygon(contour[:, [1, 0]]) for contour in contours]
239+
240+
polygon = MultiPolygon(polygons) if len(polygons) > 1 else polygons[0]
236241

237242
yoff, xoff, *_ = region_props.bbox
238-
return [shapely.affinity.translate(poly, xoff, yoff) for poly in polygons]
243+
return shapely.affinity.translate(polygon, xoff - 1, yoff - 1) # account for the padding
239244

240245

241246
def _vectorize_mask(
242247
mask: np.ndarray, # type: ignore[type-arg]
243248
) -> GeoDataFrame:
244249
if mask.max() == 0:
245-
return GeoDataFrame(geometry=[])
250+
return GeoDataFrame({"label": []}, geometry=[])
246251

247252
regions = skimage.measure.regionprops(mask)
248253

249-
polygons_list = [_region_props_to_polygons(region) for region in regions]
250-
geoms = [poly for polygons in polygons_list for poly in polygons]
251-
labels = [region.label for i, region in enumerate(regions) for _ in range(len(polygons_list[i]))]
252-
253-
return GeoDataFrame({"label": labels}, geometry=geoms)
254+
return GeoDataFrame(
255+
{"label": [region.label for region in regions]},
256+
geometry=[_region_props_to_polygon(region) for region in regions],
257+
)
254258

255259

256260
def _dissolve_on_overlaps(label: int, group: GeoDataFrame) -> GeoDataFrame:

tests/core/operations/test_vectorize.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,15 @@
33
import numpy as np
44
import pytest
55
from geopandas import GeoDataFrame
6-
from shapely import MultiPoint, Point
7-
8-
from spatialdata._core.operations.vectorize import to_circles, to_polygons
6+
from shapely import MultiPoint, Point, Polygon
7+
from shapely.ops import unary_union
8+
from skimage.draw import polygon
9+
10+
from spatialdata._core.operations.vectorize import (
11+
_vectorize_mask,
12+
to_circles,
13+
to_polygons,
14+
)
915
from spatialdata.datasets import blobs
1016
from spatialdata.models.models import ShapesModel
1117
from spatialdata.testing import assert_elements_are_identical
@@ -74,7 +80,10 @@ def test_polygons_to_circles() -> None:
7480
new_circles = to_circles(element)
7581

7682
data = {
77-
"geometry": [Point(315.8120722406787, 220.18894606643332), Point(270.1386975678398, 417.8747936281634)],
83+
"geometry": [
84+
Point(315.8120722406787, 220.18894606643332),
85+
Point(270.1386975678398, 417.8747936281634),
86+
],
7887
"radius": [16.608781, 17.541365],
7988
}
8089
expected = ShapesModel.parse(GeoDataFrame(data, geometry="geometry"))
@@ -87,7 +96,10 @@ def test_multipolygons_to_circles() -> None:
8796
new_circles = to_circles(element)
8897

8998
data = {
90-
"geometry": [Point(340.37951022629096, 250.76310705786318), Point(337.1680699150594, 316.39984581697314)],
99+
"geometry": [
100+
Point(340.37951022629096, 250.76310705786318),
101+
Point(337.1680699150594, 316.39984581697314),
102+
],
91103
"radius": [23.488363, 19.059285],
92104
}
93105
expected = ShapesModel.parse(GeoDataFrame(data, geometry="geometry"))
@@ -139,3 +151,23 @@ def test_invalid_geodataframe_to_polygons() -> None:
139151
gdf = GeoDataFrame(geometry=[MultiPoint([[0, 0], [1, 1]])])
140152
with pytest.raises(RuntimeError, match="Unsupported geometry type"):
141153
to_polygons(gdf)
154+
155+
156+
def test_vectorize_mask_almost_invertible() -> None:
157+
cell = Polygon([[10, 10], [30, 40], [90, 50], [100, 20]])
158+
image_shape = (70, 120)
159+
160+
rasterized_image = np.zeros(image_shape, dtype=np.int8)
161+
x, y = cell.exterior.coords.xy
162+
rr, cc = polygon(y, x, image_shape)
163+
rasterized_image[rr, cc] = 1
164+
165+
new_cell = _vectorize_mask(rasterized_image)
166+
new_cell = unary_union(new_cell.geometry)
167+
168+
assert new_cell.intersection(cell).area / new_cell.union(cell).area > 0.97
169+
170+
171+
def test_label_column_vectorize_mask() -> None:
172+
assert "label" in _vectorize_mask(np.array([0]))
173+
assert "label" in _vectorize_mask(np.array([[0, 1], [1, 1]]))

0 commit comments

Comments
 (0)