Skip to content

Commit e4d3e9a

Browse files
committed
Improve peak memory usage in unstructured grid conventions
1 parent e5e48fe commit e4d3e9a

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

src/emsarray/conventions/ugrid.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
"""
88
import enum
99
import logging
10+
import math
1011
import pathlib
1112
import warnings
1213
from collections import defaultdict
@@ -1115,10 +1116,15 @@ def _make_polygons(self) -> numpy.ndarray:
11151116
for unique_size in unique_sizes:
11161117
# Extract the face node data for every polygon of this size
11171118
indices = numpy.flatnonzero(polygon_sizes == unique_size)
1118-
nodes = numpy.ma.getdata(face_node)[indices, :unique_size]
1119-
coords = numpy.stack([node_x[nodes], node_y[nodes]], axis=-1)
1120-
# Generate the polygons directly in to their correct locations
1121-
shapely.polygons(coords, indices=indices, out=polygons)
1119+
chunk_size = 1000
1120+
chunk_count = math.ceil(len(indices) / chunk_size)
1121+
for chunk_index in range(chunk_count):
1122+
chunk_slice = slice(chunk_index * chunk_size, (chunk_index + 1) * chunk_size)
1123+
chunk_indices = indices[chunk_slice]
1124+
nodes = numpy.ma.getdata(face_node)[chunk_indices, :unique_size]
1125+
coords = numpy.stack([node_x[nodes], node_y[nodes]], axis=-1)
1126+
# Generate the polygons directly in to their correct locations
1127+
shapely.polygons(coords, indices=chunk_indices, out=polygons)
11221128

11231129
return polygons
11241130

tests/conventions/test_ugrid.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import json
2+
import logging
23
import pathlib
34
import warnings
4-
import logging
55

66
import geojson
77
import numpy
@@ -21,7 +21,9 @@
2121
ConventionViolationError, ConventionViolationWarning
2222
)
2323
from emsarray.operations import geometry
24-
from tests.utils import assert_property_not_cached, filter_warning, track_peak_memory_usage
24+
from tests.utils import (
25+
assert_property_not_cached, filter_warning, track_peak_memory_usage
26+
)
2527

2628
logger = logging.getLogger(__name__)
2729

@@ -988,13 +990,13 @@ def test_has_valid_face_edge_connectivity():
988990

989991
@pytest.mark.memory_usage
990992
def test_make_polygons_memory_usage():
991-
dataset = make_dataset(width=500, height=400)
993+
dataset = make_dataset(width=600, height=600)
992994

993995
with track_peak_memory_usage() as tracker:
994996
assert len(dataset.ems.polygons) == dataset.ems.topology.face_count
995997

996-
logger.info(f"current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak)
998+
logger.info("current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak)
997999

998-
target = 124_000_000
1000+
target = 78_000_000
9991001
assert tracker.peak < target, "Peak memory allocation is too large"
10001002
assert tracker.peak > target * 0.9, "Peak memory allocation is suspiciously small - did you improve things?"

0 commit comments

Comments
 (0)