Skip to content

Commit e5e48fe

Browse files
committed
Improve peak memory usage in Arakawa C grid conventions
1 parent fe4443a commit e5e48fe

File tree

2 files changed

+30
-16
lines changed

2 files changed

+30
-16
lines changed

src/emsarray/conventions/arakawa_c.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -261,18 +261,31 @@ def pack_index(self, grid_kind: ArakawaCGridKind, indexes: Sequence[int]) -> Ara
261261
return cast(ArakawaCIndex, (grid_kind, *indexes))
262262

263263
def _make_polygons(self) -> numpy.ndarray:
264-
# Make an array of shape (j, i, 2) of all the nodes
265-
grid = numpy.stack([self.node.longitude.values, self.node.latitude.values], axis=-1)
266-
267-
# Transform this in to an array of shape (topology.size, 4, 2)
268-
points = numpy.stack([
269-
grid[:-1, :-1],
270-
grid[:-1, +1:],
271-
grid[+1:, +1:],
272-
grid[+1:, :-1],
273-
], axis=2).reshape((-1, 4, 2))
274-
275-
return utils.make_polygons_with_holes(points)
264+
j_size, i_size = self.face.shape
265+
longitude = self.node.longitude.values
266+
latitude = self.node.latitude.values
267+
268+
# Preallocate the points array. We will copy data straight in to this
269+
# to save repeated memory allocations.
270+
points = numpy.empty(shape=(i_size, 4, 2), dtype=self.node.longitude.dtype)
271+
# Preallocate the output array so we can fill it in batches
272+
out = numpy.full(shape=self.face.size, fill_value=None, dtype=object)
273+
# Construct polygons row by row
274+
for j in range(self.face.shape[0]):
275+
points[:, 0, 0] = longitude[j + 0, :-1]
276+
points[:, 1, 0] = longitude[j + 0, +1:]
277+
points[:, 2, 0] = longitude[j + 1, +1:]
278+
points[:, 3, 0] = longitude[j + 1, :-1]
279+
280+
points[:, 0, 1] = latitude[j + 0, :-1]
281+
points[:, 1, 1] = latitude[j + 0, +1:]
282+
points[:, 2, 1] = latitude[j + 1, +1:]
283+
points[:, 3, 1] = latitude[j + 1, :-1]
284+
285+
j_slice = slice(j * i_size, (j + 1) * i_size)
286+
utils.make_polygons_with_holes(points, out=out[j_slice])
287+
288+
return out
276289

277290
@cached_property
278291
def face_centres(self) -> numpy.ndarray:

tests/conventions/test_shoc_standard.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import itertools
22
import json
3-
import pathlib
43
import logging
4+
import pathlib
55

66
import numpy
77
import pandas
@@ -18,7 +18,8 @@
1818
from emsarray.conventions.shoc import ShocStandard
1919
from emsarray.operations import geometry
2020
from tests.utils import (
21-
DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator, mask_from_strings, track_peak_memory_usage
21+
DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator, mask_from_strings,
22+
track_peak_memory_usage
2223
)
2324

2425
logger = logging.getLogger(__name__)
@@ -670,8 +671,8 @@ def test_make_polygons_memory_usage():
670671
with track_peak_memory_usage() as tracker:
671672
assert len(dataset.ems.polygons) == j_size * i_size
672673

673-
logger.info(f"current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak)
674+
logger.info("current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak)
674675

675-
target = 570_000_000
676+
target = 133_000_000
676677
assert tracker.peak < target, "Peak memory allocation is too large"
677678
assert tracker.peak > target * 0.9, "Peak memory allocation is suspiciously small - did you improve things?"

0 commit comments

Comments
 (0)