Skip to content

Commit db09188

Browse files
committed
Add minimal support for live mask configuration
1 parent db741ea commit db09188

File tree

2 files changed

+54
-2
lines changed

2 files changed

+54
-2
lines changed

src/mdio/converters/segy.py

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def segy_to_mdio( # noqa: C901
116116
storage_options_output: dict[str, Any] | None = None,
117117
overwrite: bool = False,
118118
grid_overrides: dict | None = None,
119+
live_mask_chunksize: Sequence[int] | None = None,
119120
) -> None:
120121
"""Convert SEG-Y file to MDIO format.
121122
@@ -170,11 +171,16 @@ def segy_to_mdio( # noqa: C901
170171
Default is `None` (will assume anonymous)
171172
overwrite: Toggle for overwriting existing store
172173
grid_overrides: Option to add grid overrides. See examples.
174+
live_mask_chunksize: Chunk size for live mask. This has limited
175+
support across the MDIO api.
176+
Default is `None` (will do no chunking)
173177
174178
Raises:
175179
GridTraceCountError: Raised if grid won't hold all traces in the
176180
SEG-Y file.
177-
ValueError: If length of chunk sizes don't match number of dimensions.
181+
ValueError: If length of chunk sizes don't match number of dimensions
182+
or live_mask_chunksize is not None and lenght of live_mask_chunksize
183+
is not equal to number of dimensions minus one.
178184
NotImplementedError: If can't determine chunking automatically for 4D+.
179185
180186
Examples:
@@ -342,6 +348,20 @@ def segy_to_mdio( # noqa: C901
342348
... chunksize=(8, 2, 256, 512),
343349
... grid_overrides={"HasDuplicates": True},
344350
... )
351+
352+
>>> segy_to_mdio(
353+
... segy_path="prefix/shot_file.segy",
354+
... mdio_path_or_buffer="s3://bucket/shot_file.mdio",
355+
... index_bytes=(133, 171, 17, 137, 13),
356+
... index_lengths=(2, 2, 4, 2, 4),
357+
... index_names=("shot_line", "gun", "shot_point", "cable", "channel"),
358+
... chunksize=(1, 1, 8, 1, 128, 1024),
359+
... grid_overrides={
360+
... "AutoShotWrap": True,
361+
... "AutoChannelWrap": True,
362+
... "AutoChannelTraceQC": 1000000
363+
... },
364+
... live_mask_chunksize=(1, 1, 8, 1, 128),
345365
"""
346366
if index_names is None:
347367
index_names = [f"dim_{i}" for i in range(len(index_bytes))]
@@ -356,6 +376,14 @@ def segy_to_mdio( # noqa: C901
356376
f"equal to array dimensions={len(index_bytes) + 1}",
357377
)
358378
raise ValueError(message)
379+
380+
if live_mask_chunksize is not None:
381+
if len(live_mask_chunksize) != len(index_bytes):
382+
message = (
383+
f"Length of live_mask_chunksize={len(live_mask_chunksize)} must be ",
384+
f"equal to array dimensions={len(index_bytes)}",
385+
)
386+
raise ValueError(message)
359387

360388
# Handle storage options and check permissions etc
361389
if storage_options_input is None:
@@ -424,12 +452,15 @@ def segy_to_mdio( # noqa: C901
424452
trace_count = np.count_nonzero(grid.live_mask)
425453
write_attribute(name="trace_count", zarr_group=zarr_root, attribute=trace_count)
426454

455+
if live_mask_chunksize is None:
456+
live_mask_chunksize = -1
457+
427458
# Note, live mask is not chunked since it's bool and small.
428459
zarr_root["metadata"].create_dataset(
429460
data=grid.live_mask,
430461
name="live_mask",
431462
shape=grid.shape[:-1],
432-
chunks=-1,
463+
chunks=live_mask_chunksize,
433464
dimension_separator="/",
434465
)
435466

tests/integration/test_segy_import_export.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,27 @@ def test_3d_import(segy_input, zarr_tmp, index_bytes, index_names):
266266
)
267267

268268

269+
@pytest.mark.dependency
270+
def test_live_mask_chunksize(segy_input, zarr_tmp):
271+
"""Test that the live_mask_chunksize parameter is handled correctly by segy_to_mdio."""
272+
segy_to_mdio(
273+
segy_path=segy_input.__str__(),
274+
mdio_path_or_buffer=zarr_tmp.__str__(),
275+
index_bytes=(17, 13),
276+
index_names=("inline", "crossline"),
277+
chunksize=(512, 512, 512), # For a 3D dataset: inline, crossline, sample
278+
overwrite=True,
279+
live_mask_chunksize=(8, 8), # Explicit live_mask chunksize to test
280+
)
281+
282+
import zarr
283+
# Open the MDIO store as a Zarr group.
284+
zarr_group = zarr.open_group(zarr_tmp.__str__(), mode="r")
285+
live_mask_ds = zarr_group["metadata"]["live_mask"]
286+
# Assert that the live_mask dataset has chunks equal to (8, 8)
287+
assert live_mask_ds.chunks == (8, 8)
288+
289+
269290
@pytest.mark.dependency("test_3d_import")
270291
class TestReader:
271292
"""Test reader functionality."""

0 commit comments

Comments
 (0)