diff --git a/docs/releases/development.rst b/docs/releases/development.rst index 7a855e78..0513124e 100644 --- a/docs/releases/development.rst +++ b/docs/releases/development.rst @@ -2,6 +2,9 @@ Next release (in development) ============================= +* Reduce memory allocations when constructing polygons. + This should allow opening even larger datasets + (:pr:`200`). * Add support for Python 3.14 and drop support for Python 3.11, following `SPEC-0 `_. (:pr:`201`). diff --git a/pyproject.toml b/pyproject.toml index 5264ff57..b47799eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ filterwarnings = [ markers = [ "matplotlib: Tests that involve matplotlib and plotting", "tutorial: Tests that involve the tutorial datasets", + "memory_usage: Regression tests for memory allocations", ] mpl-use-full-test-name = true diff --git a/src/emsarray/conventions/arakawa_c.py b/src/emsarray/conventions/arakawa_c.py index 17e77425..3e62a7a3 100644 --- a/src/emsarray/conventions/arakawa_c.py +++ b/src/emsarray/conventions/arakawa_c.py @@ -261,18 +261,31 @@ def pack_index(self, grid_kind: ArakawaCGridKind, indexes: Sequence[int]) -> Ara return cast(ArakawaCIndex, (grid_kind, *indexes)) def _make_polygons(self) -> numpy.ndarray: - # Make an array of shape (j, i, 2) of all the nodes - grid = numpy.stack([self.node.longitude.values, self.node.latitude.values], axis=-1) - - # Transform this in to an array of shape (topology.size, 4, 2) - points = numpy.stack([ - grid[:-1, :-1], - grid[:-1, +1:], - grid[+1:, +1:], - grid[+1:, :-1], - ], axis=2).reshape((-1, 4, 2)) - - return utils.make_polygons_with_holes(points) + j_size, i_size = self.face.shape + longitude = self.node.longitude.values + latitude = self.node.latitude.values + + # Preallocate the points array. We will copy data straight in to this + # to save repeated memory allocations. + points = numpy.empty(shape=(i_size, 4, 2), dtype=self.node.longitude.dtype) + # Preallocate the output array so we can fill it in batches + out = numpy.full(shape=self.face.size, fill_value=None, dtype=object) + # Construct polygons row by row + for j in range(self.face.shape[0]): + points[:, 0, 0] = longitude[j + 0, :-1] + points[:, 1, 0] = longitude[j + 0, +1:] + points[:, 2, 0] = longitude[j + 1, +1:] + points[:, 3, 0] = longitude[j + 1, :-1] + + points[:, 0, 1] = latitude[j + 0, :-1] + points[:, 1, 1] = latitude[j + 0, +1:] + points[:, 2, 1] = latitude[j + 1, +1:] + points[:, 3, 1] = latitude[j + 1, :-1] + + j_slice = slice(j * i_size, (j + 1) * i_size) + utils.make_polygons_with_holes(points, out=out[j_slice]) + + return out @cached_property def face_centres(self) -> numpy.ndarray: diff --git a/src/emsarray/conventions/grid.py b/src/emsarray/conventions/grid.py index a5a941c1..2997a2d8 100644 --- a/src/emsarray/conventions/grid.py +++ b/src/emsarray/conventions/grid.py @@ -413,39 +413,32 @@ def _make_polygons(self) -> numpy.ndarray: lon_bounds = self.topology.longitude_bounds.values lat_bounds = self.topology.latitude_bounds.values - # Make a bounds array as if this dataset had 2D coordinates. - # 1D bounds are (lon, 2) and (lat, 2). - # 2D bounds are (lat, lon, 4) - # where the 4 points are (j-i, i-1), (j-1, i+1), (j+1, i+1), (j+1, i-1). - # The bounds values are repeated as required, are given a new dimension, - # then repeated along that new dimension. - # They will come out as array with shape (y_size, x_size, 4) - - lon_bounds_2d = numpy.stack([ - lon_bounds[:, 0], - lon_bounds[:, 1], - lon_bounds[:, 1], - lon_bounds[:, 0], - ], axis=-1) - lon_bounds_2d = numpy.broadcast_to(numpy.expand_dims(lon_bounds_2d, 0), (y_size, x_size, 4)) - - lat_bounds_2d = numpy.stack([ - lat_bounds[:, 0], - lat_bounds[:, 0], - lat_bounds[:, 1], - lat_bounds[:, 1], - ], axis=-1) - lat_bounds_2d = numpy.broadcast_to(numpy.expand_dims(lat_bounds_2d, 0), (x_size, y_size, 4)) - lat_bounds_2d = numpy.transpose(lat_bounds_2d, (1, 0, 2)) - - assert lon_bounds_2d.shape == lat_bounds_2d.shape == (y_size, x_size, 4) - - # points is a (topology.size, 4, 2) array of the corners of each cell - points = numpy.stack([lon_bounds_2d, lat_bounds_2d], axis=-1).reshape((-1, 4, 2)) - - polygons = utils.make_polygons_with_holes(points) - - return polygons + # Create the polygons batched by row. + # The point array is copied by shapely before being used, + # so this can accidentally use a whole bunch of memory for large datasets. + # Creating them one by one is very slow but very memory efficient. + # Creating the polygons in one batch is faster but uses up a huge amount of memory. + # Batching them row by row is a decent compromise. + out = numpy.full(shape=y_size * x_size, dtype=object, fill_value=None) + + # By preallocating this array, we can copy data in to it to save on a number of allocations. + chunk_points = numpy.empty(shape=(x_size, 4, 2), dtype=lon_bounds.dtype) + # By chunking by row, the longitude bounds never change between loops + chunk_points[:, 0, 0] = lon_bounds[:, 0] + chunk_points[:, 1, 0] = lon_bounds[:, 1] + chunk_points[:, 2, 0] = lon_bounds[:, 1] + chunk_points[:, 3, 0] = lon_bounds[:, 0] + + for row in range(y_size): + chunk_points[:, 0, 1] = lat_bounds[row, 0] + chunk_points[:, 1, 1] = lat_bounds[row, 0] + chunk_points[:, 2, 1] = lat_bounds[row, 1] + chunk_points[:, 3, 1] = lat_bounds[row, 1] + + row_slice = slice(row * x_size, (row + 1) * x_size) + utils.make_polygons_with_holes(chunk_points, out=out[row_slice]) + + return out @cached_property def face_centres(self) -> numpy.ndarray: @@ -597,13 +590,22 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None: def _make_polygons(self) -> numpy.ndarray: # Construct polygons from the bounds of the cells - lon_bounds = self.topology.longitude_bounds.values - lat_bounds = self.topology.latitude_bounds.values - - # points is a (topology.size, 4, 2) array of the corners of each cell - points = numpy.stack([lon_bounds, lat_bounds], axis=-1).reshape((-1, 4, 2)) - - return utils.make_polygons_with_holes(points) + j_size, i_size = self.topology.shape + lon_bounds = self.topology.longitude_bounds + lat_bounds = self.topology.latitude_bounds + + assert lon_bounds.shape == (j_size, i_size, 4) + assert lat_bounds.shape == (j_size, i_size, 4) + + chunk_points = numpy.empty(shape=(i_size, 4, 2), dtype=lon_bounds.dtype) + out = numpy.full(shape=j_size * i_size, dtype=object, fill_value=None) + for j in range(j_size): + chunk_points[:, :, 0] = lon_bounds[j, :, :] + chunk_points[:, :, 1] = lat_bounds[j, :, :] + chunk_slice = slice(j * i_size, (j + 1) * i_size) + utils.make_polygons_with_holes(chunk_points, out=out[chunk_slice]) + + return out @cached_property def face_centres(self) -> numpy.ndarray: diff --git a/src/emsarray/conventions/ugrid.py b/src/emsarray/conventions/ugrid.py index e3675a09..d7ffa445 100644 --- a/src/emsarray/conventions/ugrid.py +++ b/src/emsarray/conventions/ugrid.py @@ -7,6 +7,7 @@ """ import enum import logging +import math import pathlib import warnings from collections import defaultdict @@ -1115,10 +1116,15 @@ def _make_polygons(self) -> numpy.ndarray: for unique_size in unique_sizes: # Extract the face node data for every polygon of this size indices = numpy.flatnonzero(polygon_sizes == unique_size) - nodes = numpy.ma.getdata(face_node)[indices, :unique_size] - coords = numpy.stack([node_x[nodes], node_y[nodes]], axis=-1) - # Generate the polygons directly in to their correct locations - shapely.polygons(coords, indices=indices, out=polygons) + chunk_size = 1000 + chunk_count = math.ceil(len(indices) / chunk_size) + for chunk_index in range(chunk_count): + chunk_slice = slice(chunk_index * chunk_size, (chunk_index + 1) * chunk_size) + chunk_indices = indices[chunk_slice] + nodes = numpy.ma.getdata(face_node)[chunk_indices, :unique_size] + coords = numpy.stack([node_x[nodes], node_y[nodes]], axis=-1) + # Generate the polygons directly in to their correct locations + shapely.polygons(coords, indices=chunk_indices, out=polygons) return polygons diff --git a/tests/conventions/test_cfgrid1d.py b/tests/conventions/test_cfgrid1d.py index fd530835..c3702b35 100644 --- a/tests/conventions/test_cfgrid1d.py +++ b/tests/conventions/test_cfgrid1d.py @@ -1,4 +1,5 @@ import json +import logging import pathlib import numpy @@ -15,7 +16,11 @@ CFGrid1D, CFGrid1DTopology, CFGridKind, CFGridTopology ) from emsarray.operations import geometry -from tests.utils import assert_property_not_cached, box, mask_from_strings +from tests.utils import ( + assert_property_not_cached, box, mask_from_strings, track_peak_memory_usage +) + +logger = logging.getLogger(__name__) def make_dataset( @@ -500,3 +505,17 @@ def test_topology(): assert_allclose( latitude_bounds.values, 0.1 * numpy.array([[i - 0.5, i + 0.5] for i in range(11)])) + + +def test_make_polygon_memory_usage() -> None: + width, height = 2000, 1000 + dataset = make_dataset(width=width, height=height) + + with track_peak_memory_usage() as tracker: + assert len(dataset.ems.polygons) == width * height + + logger.info("current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak) + + target = 135_000_000 + assert tracker.peak < target, "Peak memory allocation is too large" + assert tracker.peak > target * 0.9, "Peak memory allocation is suspiciously small - did you improve things?" diff --git a/tests/conventions/test_cfgrid2d.py b/tests/conventions/test_cfgrid2d.py index fc055079..6ad431ed 100644 --- a/tests/conventions/test_cfgrid2d.py +++ b/tests/conventions/test_cfgrid2d.py @@ -7,6 +7,7 @@ """ import itertools import json +import logging import pathlib import numpy @@ -25,9 +26,12 @@ from emsarray.operations import geometry from tests.utils import ( AxisAlignedShocGrid, DiagonalShocGrid, ShocGridGenerator, - ShocLayerGenerator, assert_property_not_cached, plot_geometry + ShocLayerGenerator, assert_property_not_cached, plot_geometry, + track_peak_memory_usage ) +logger = logging.getLogger(__name__) + def make_dataset( *, @@ -497,3 +501,17 @@ def test_plot_on_figure() -> None: dataset.ems.plot_on_figure(figure, surface_temp) assert len(figure.axes) == 2 + + +def test_make_polygon_memory_usage() -> None: + j_size, i_size = 1000, 2000 + dataset = make_dataset(j_size=j_size, i_size=i_size) + + with track_peak_memory_usage() as tracker: + assert len(dataset.ems.polygons) == j_size * i_size + + logger.info("current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak) + + target = 300_000_000 + assert tracker.peak < target, "Peak memory allocation is too large" + assert tracker.peak > target * 0.9, "Peak memory allocation is suspiciously small - did you improve things?" diff --git a/tests/conventions/test_shoc_standard.py b/tests/conventions/test_shoc_standard.py index cf944fda..57882270 100644 --- a/tests/conventions/test_shoc_standard.py +++ b/tests/conventions/test_shoc_standard.py @@ -1,5 +1,6 @@ import itertools import json +import logging import pathlib import numpy @@ -17,9 +18,12 @@ from emsarray.conventions.shoc import ShocStandard from emsarray.operations import geometry from tests.utils import ( - DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator, mask_from_strings + DiagonalShocGrid, ShocGridGenerator, ShocLayerGenerator, mask_from_strings, + track_peak_memory_usage ) +logger = logging.getLogger(__name__) + def make_dataset( *, @@ -657,3 +661,18 @@ def clip_values(values: numpy.ndarray) -> numpy.ndarray: assert clipped.ems.polygons[6] is None assert clipped.ems.polygons[7].equals_exact(original_polygons[7], 1e-6) assert clipped.ems.polygons[8] is None + + +@pytest.mark.memory_usage +def test_make_polygons_memory_usage(): + j_size, i_size = 1000, 2000 + dataset = make_dataset(j_size=j_size, i_size=i_size) + + with track_peak_memory_usage() as tracker: + assert len(dataset.ems.polygons) == j_size * i_size + + logger.info("current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak) + + target = 134_500_000 + assert tracker.peak < target, "Peak memory allocation is too large" + assert tracker.peak > target * 0.9, "Peak memory allocation is suspiciously small - did you improve things?" diff --git a/tests/conventions/test_ugrid.py b/tests/conventions/test_ugrid.py index 4924032f..db38cce0 100644 --- a/tests/conventions/test_ugrid.py +++ b/tests/conventions/test_ugrid.py @@ -1,4 +1,5 @@ import json +import logging import pathlib import warnings @@ -20,7 +21,11 @@ ConventionViolationError, ConventionViolationWarning ) from emsarray.operations import geometry -from tests.utils import assert_property_not_cached, filter_warning +from tests.utils import ( + assert_property_not_cached, filter_warning, track_peak_memory_usage +) + +logger = logging.getLogger(__name__) def make_faces(width: int, height, fill_value: int) -> tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray]: @@ -981,3 +986,17 @@ def test_has_valid_face_edge_connectivity(): dataset_fill_value_above_range['Mesh2_face_edges'].encoding['_FillValue'] = 89 assert dataset_fill_value_above_range.ems.topology.has_valid_face_edge_connectivity is True + + +@pytest.mark.memory_usage +def test_make_polygons_memory_usage(): + dataset = make_dataset(width=600, height=600) + + with track_peak_memory_usage() as tracker: + assert len(dataset.ems.polygons) == dataset.ems.topology.face_count + + logger.info("current memory usage: %d, peak memory usage: %d", tracker.current, tracker.peak) + + target = 78_000_000 + assert tracker.peak < target, "Peak memory allocation is too large" + assert tracker.peak > target * 0.9, "Peak memory allocation is suspiciously small - did you improve things?" diff --git a/tests/utils.py b/tests/utils.py index bfc0ffa7..2e8c8742 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -2,10 +2,12 @@ import contextlib import importlib.metadata import itertools +import tracemalloc import warnings from collections.abc import Hashable from functools import cached_property -from typing import Any +from types import TracebackType +from typing import Any, Type import matplotlib.pyplot as plt import numpy @@ -462,3 +464,41 @@ def plot_geometry( axes.set_extent(extent) figure.savefig(out) + + +class TracemallocTracker: + _finished = False + _usage = None + + def __enter__(self): + tracemalloc.start() + return self + + @property + def current(self): + if not self._finished: + raise RuntimeError("Context manager has not exited yet") + return self._usage[0] + + @property + def peak(self): + if not self._finished: + raise RuntimeError("Context manager has not exited yet") + return self._usage[1] + + def __exit__( + self, + exc_type: Type[Exception] | None, + exc_value: Exception | None, + exc_traceback: TracebackType | None, + ) -> bool | None: + self._finished = True + self._usage = tracemalloc.get_traced_memory() + + tracemalloc.stop() + + return None + + +def track_peak_memory_usage(): + return TracemallocTracker()