Skip to content

Commit cdfeac2

Browse files
optimize concatenation of centroids
1 parent bc37227 commit cdfeac2

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

climada/hazard/centroids/centr.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,12 @@ def from_pnt_bounds(cls, points_bounds, res, crs=DEF_CRS):
332332
)
333333

334334
def append(self, centr):
335-
"""Append Centroids
335+
"""Append Centroids to the current object for batch concatenation.
336+
337+
This method adds the `centr.gdf` to the list of DataFrames to be concatenated
338+
later with `finalize_append()`. Instead of concatenating immediately, it accumulates
339+
the centroids in `_batch_gdf` to perform the concatenation all at once, which is more
340+
efficient for multiple appends.
336341
337342
Note that the result might contain duplicate points if the object to append has an overlap
338343
with the current object.
@@ -351,12 +356,31 @@ def append(self, centr):
351356
union : Union of Centroid objects.
352357
remove_duplicate_points : Remove duplicate points in a Centroids object.
353358
"""
359+
360+
if not hasattr(self, "_batch_gdf"):
361+
self._batch_gdf = [] # Initialize the batch
362+
354363
if not u_coord.equal_crs(self.crs, centr.crs):
355364
raise ValueError(
356365
f"The given centroids use different CRS: {self.crs}, {centr.crs}. "
357366
"The centroids are incompatible and cannot be concatenated."
358367
)
359-
self.gdf = pd.concat([self.gdf, centr.gdf])
368+
self._batch_gdf.append(centr.gdf)
369+
370+
def finalize_append(self):
371+
"""Concatenate all batch-appended centroids into the main GeoDataFrame (gdf).
372+
373+
This method should be called after all `append` operations have been performed on the
374+
Centroids object. It concatenates all the accumulated GeoDataFrames stored in the
375+
`_batch_gdf` list into the `gdf` attribute of the Centroids object. By doing this in one
376+
step, it avoids the performance overhead associated with repeated concatenations.
377+
378+
Once concatenation is complete, the `_batch_gdf` list is cleared to prepare for future
379+
append operations.
380+
"""
381+
382+
self.gdf = pd.concat([self.gdf] + self._batch_gdf, ignore_index=True)
383+
self._batch_gdf = [] # clear the batch after concatenation
360384

361385
def union(self, *others):
362386
"""Create the union of Centroids objects
@@ -377,6 +401,7 @@ def union(self, *others):
377401
centroids = copy.deepcopy(self)
378402
for cent in others:
379403
centroids.append(cent)
404+
centroids.finalize_append()
380405
return centroids.remove_duplicate_points()
381406

382407
def remove_duplicate_points(self):

0 commit comments

Comments
 (0)