Skip to content

Commit 7bf7767

Browse files
committed
Optimization to try and add print for profiling
1 parent a23fb84 commit 7bf7767

File tree

2 files changed

+20
-2
lines changed

2 files changed

+20
-2
lines changed

src/mdio/converters/segy.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,7 +472,14 @@ def segy_to_mdio( # noqa: PLR0913, PLR0915
472472
local_coords: list[np.ndarray] = []
473473
for dim_idx, sl in enumerate(chunk_indices):
474474
hdr_arr = grid.header_index_arrays[dim_idx]
475-
local_idx = (hdr_arr[trace_ids] - sl.start).astype(int)
475+
# Optimize memory usage: hdr_arr and trace_ids are already uint32,
476+
# sl.start is int, so result should naturally be int32/uint32.
477+
# Avoid unnecessary astype conversion to int64.
478+
indexed_coords = hdr_arr[trace_ids] # uint32 array
479+
local_idx = indexed_coords - sl.start # remains uint32
480+
# Only convert dtype if necessary for indexing (numpy requires int for indexing)
481+
if local_idx.dtype != np.intp:
482+
local_idx = local_idx.astype(np.intp)
476483
local_coords.append(local_idx)
477484

478485
# Mark live cells in the temporary block
@@ -487,6 +494,10 @@ def segy_to_mdio( # noqa: PLR0913, PLR0915
487494
write_attribute(name="text_header", zarr_group=meta_group, attribute=text_header.split("\n"))
488495
write_attribute(name="binary_header", zarr_group=meta_group, attribute=binary_header.to_dict())
489496

497+
from datetime import datetime
498+
499+
print("The livemask was written at time:", datetime.now().strftime("%H:%M:%S"))
500+
490501
# Write traces
491502
stats = blocked_io.to_zarr(
492503
segy_file=segy,

src/mdio/segy/_workers.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,14 @@ def trace_worker(
9696
local_coords: list[np.ndarray] = []
9797
for dim_idx, sl in enumerate(chunk_indices[:-1]):
9898
hdr_arr = grid.header_index_arrays[dim_idx]
99-
local_idx = (hdr_arr[trace_ids] - sl.start).astype(int)
99+
# Optimize memory usage: hdr_arr and trace_ids are already uint32,
100+
# sl.start is int, so result should naturally be int32/uint32.
101+
# Avoid unnecessary astype conversion to int64.
102+
indexed_coords = hdr_arr[trace_ids] # uint32 array
103+
local_idx = indexed_coords - sl.start # remains uint32
104+
# Only convert dtype if necessary for indexing (numpy requires int for indexing)
105+
if local_idx.dtype != np.intp:
106+
local_idx = local_idx.astype(np.intp)
100107
local_coords.append(local_idx)
101108
full_idx = tuple(local_coords) + (slice(None),)
102109

0 commit comments

Comments
 (0)