Skip to content

Commit a7ab60e

Browse files
committed
Memory management
1 parent 4e35a13 commit a7ab60e

File tree

1 file changed

+25
-12
lines changed

1 file changed

+25
-12
lines changed

src/mdio/converters/segy.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -456,17 +456,19 @@ def segy_to_mdio( # noqa: PLR0913, PLR0915
456456
# 'live_mask_array' has the same first N–1 dims as 'grid.shape[:-1]'
457457
# Build a ChunkIterator over the live_mask (no sample axis)
458458
from mdio.core.indexing import ChunkIterator
459+
import gc
459460

460461
chunker = ChunkIterator(live_mask_array, chunk_samples=True)
461462
for chunk_indices in chunker:
462463
# chunk_indices is a tuple of N–1 slice objects
463464
trace_ids = grid.get_traces_for_chunk(chunk_indices)
464465
if trace_ids.size == 0:
466+
# Free memory immediately for empty chunks
467+
del trace_ids
465468
continue
466469

467470
# Build a temporary boolean block of shape = chunk shape
468-
block_shape = tuple(sl.stop - sl.start for sl in chunk_indices)
469-
block = np.zeros(block_shape, dtype=bool)
471+
block = np.zeros(tuple(sl.stop - sl.start for sl in chunk_indices), dtype=bool)
470472

471473
# Compute local coords within this block for each trace_id
472474
local_coords: list[np.ndarray] = []
@@ -477,33 +479,44 @@ def segy_to_mdio( # noqa: PLR0913, PLR0915
477479
# Avoid unnecessary astype conversion to int64.
478480
indexed_coords = hdr_arr[trace_ids] # uint32 array
479481
local_idx = indexed_coords - sl.start # remains uint32
482+
# Free indexed_coords immediately
483+
del indexed_coords
484+
480485
# Only convert dtype if necessary for indexing (numpy requires int for indexing)
481486
if local_idx.dtype != np.intp:
482487
local_idx = local_idx.astype(np.intp)
483488
local_coords.append(local_idx)
489+
# local_idx is now owned by local_coords list, safe to continue
490+
491+
# Free trace_ids as soon as we're done with it
492+
del trace_ids
484493

485494
# Mark live cells in the temporary block
486495
block[tuple(local_coords)] = True
496+
497+
# Free local_coords immediately after use
498+
del local_coords
487499

488500
# Write the entire block to Zarr at once
489501
live_mask_array.set_basic_selection(selection=chunk_indices, value=block)
502+
503+
# Free block immediately after writing
504+
del block
505+
506+
# Force garbage collection periodically to free memory aggressively
507+
gc.collect()
508+
509+
# Final cleanup
510+
del live_mask_array
511+
del chunker
512+
gc.collect()
490513

491514
nonzero_count = grid.num_traces
492515

493516
write_attribute(name="trace_count", zarr_group=root_group, attribute=nonzero_count)
494517
write_attribute(name="text_header", zarr_group=meta_group, attribute=text_header.split("\n"))
495518
write_attribute(name="binary_header", zarr_group=meta_group, attribute=binary_header.to_dict())
496519

497-
# Clean up live mask related variables
498-
del live_mask_array
499-
del chunker
500-
del block
501-
del local_coords
502-
del trace_ids
503-
del chunk_indices
504-
import gc
505-
gc.collect()
506-
507520
# Write traces
508521
stats = blocked_io.to_zarr(
509522
segy_file=segy,

0 commit comments

Comments
 (0)