@@ -413,39 +413,32 @@ def _make_polygons(self) -> numpy.ndarray:
413413 lon_bounds = self .topology .longitude_bounds .values
414414 lat_bounds = self .topology .latitude_bounds .values
415415
416- # Make a bounds array as if this dataset had 2D coordinates.
417- # 1D bounds are (lon, 2) and (lat, 2).
418- # 2D bounds are (lat, lon, 4)
419- # where the 4 points are (j-i, i-1), (j-1, i+1), (j+1, i+1), (j+1, i-1).
420- # The bounds values are repeated as required, are given a new dimension,
421- # then repeated along that new dimension.
422- # They will come out as array with shape (y_size, x_size, 4)
423-
424- lon_bounds_2d = numpy .stack ([
425- lon_bounds [:, 0 ],
426- lon_bounds [:, 1 ],
427- lon_bounds [:, 1 ],
428- lon_bounds [:, 0 ],
429- ], axis = - 1 )
430- lon_bounds_2d = numpy .broadcast_to (numpy .expand_dims (lon_bounds_2d , 0 ), (y_size , x_size , 4 ))
431-
432- lat_bounds_2d = numpy .stack ([
433- lat_bounds [:, 0 ],
434- lat_bounds [:, 0 ],
435- lat_bounds [:, 1 ],
436- lat_bounds [:, 1 ],
437- ], axis = - 1 )
438- lat_bounds_2d = numpy .broadcast_to (numpy .expand_dims (lat_bounds_2d , 0 ), (x_size , y_size , 4 ))
439- lat_bounds_2d = numpy .transpose (lat_bounds_2d , (1 , 0 , 2 ))
440-
441- assert lon_bounds_2d .shape == lat_bounds_2d .shape == (y_size , x_size , 4 )
442-
443- # points is a (topology.size, 4, 2) array of the corners of each cell
444- points = numpy .stack ([lon_bounds_2d , lat_bounds_2d ], axis = - 1 ).reshape ((- 1 , 4 , 2 ))
445-
446- polygons = utils .make_polygons_with_holes (points )
447-
448- return polygons
416+ # Create the polygons batched by row.
417+ # The point array is copied by shapely before being used,
418+ # so this can accidentally use a whole bunch of memory for large datasets.
419+ # Creating them one by one is very slow but very memory efficient.
420+ # Creating the polygons in one batch is faster but uses up a huge amount of memory.
421+ # Batching them row by row is a decent compromise.
422+ out = numpy .full (shape = y_size * x_size , dtype = object , fill_value = None )
423+
424+ # By preallocating this array, we can copy data in to it to save on a number of allocations.
425+ chunk_points = numpy .empty (shape = (x_size , 4 , 2 ), dtype = lon_bounds .dtype )
426+ # By chunking by row, the longitude bounds never change between loops
427+ chunk_points [:, 0 , 0 ] = lon_bounds [:, 0 ]
428+ chunk_points [:, 1 , 0 ] = lon_bounds [:, 1 ]
429+ chunk_points [:, 2 , 0 ] = lon_bounds [:, 1 ]
430+ chunk_points [:, 3 , 0 ] = lon_bounds [:, 0 ]
431+
432+ for row in range (y_size ):
433+ chunk_points [:, 0 , 1 ] = lat_bounds [row , 0 ]
434+ chunk_points [:, 1 , 1 ] = lat_bounds [row , 0 ]
435+ chunk_points [:, 2 , 1 ] = lat_bounds [row , 1 ]
436+ chunk_points [:, 3 , 1 ] = lat_bounds [row , 1 ]
437+
438+ row_slice = slice (row * x_size , (row + 1 ) * x_size )
439+ utils .make_polygons_with_holes (chunk_points , out = out [row_slice ])
440+
441+ return out
449442
450443 @cached_property
451444 def face_centres (self ) -> numpy .ndarray :
@@ -597,13 +590,22 @@ def check_dataset(cls, dataset: xarray.Dataset) -> int | None:
597590
598591 def _make_polygons (self ) -> numpy .ndarray :
599592 # Construct polygons from the bounds of the cells
600- lon_bounds = self .topology .longitude_bounds .values
601- lat_bounds = self .topology .latitude_bounds .values
602-
603- # points is a (topology.size, 4, 2) array of the corners of each cell
604- points = numpy .stack ([lon_bounds , lat_bounds ], axis = - 1 ).reshape ((- 1 , 4 , 2 ))
605-
606- return utils .make_polygons_with_holes (points )
593+ j_size , i_size = self .topology .shape
594+ lon_bounds = self .topology .longitude_bounds
595+ lat_bounds = self .topology .latitude_bounds
596+
597+ assert lon_bounds .shape == (j_size , i_size , 4 )
598+ assert lat_bounds .shape == (j_size , i_size , 4 )
599+
600+ chunk_points = numpy .empty (shape = (i_size , 4 , 2 ), dtype = lon_bounds .dtype )
601+ out = numpy .full (shape = j_size * i_size , dtype = object , fill_value = None )
602+ for j in range (j_size ):
603+ chunk_points [:, :, 0 ] = lon_bounds [j , :, :]
604+ chunk_points [:, :, 1 ] = lat_bounds [j , :, :]
605+ chunk_slice = slice (j * i_size , (j + 1 ) * i_size )
606+ utils .make_polygons_with_holes (chunk_points , out = out [chunk_slice ])
607+
608+ return out
607609
608610 @cached_property
609611 def face_centres (self ) -> numpy .ndarray :
0 commit comments