@@ -116,6 +116,7 @@ def segy_to_mdio( # noqa: C901
116116 storage_options_output : dict [str , Any ] | None = None ,
117117 overwrite : bool = False ,
118118 grid_overrides : dict | None = None ,
119+ live_mask_chunksize : Sequence [int ] | None = None ,
119120) -> None :
120121 """Convert SEG-Y file to MDIO format.
121122
@@ -170,11 +171,16 @@ def segy_to_mdio( # noqa: C901
170171 Default is `None` (will assume anonymous)
171172 overwrite: Toggle for overwriting existing store
172173 grid_overrides: Option to add grid overrides. See examples.
174+ live_mask_chunksize: Chunk size for live mask. This has limited
175+ support across the MDIO api.
176+ Default is `None` (will do no chunking)
173177
174178 Raises:
175179 GridTraceCountError: Raised if grid won't hold all traces in the
176180 SEG-Y file.
177- ValueError: If length of chunk sizes don't match number of dimensions.
181+ ValueError: If length of chunk sizes don't match number of dimensions
182+ or live_mask_chunksize is not None and lenght of live_mask_chunksize
183+ is not equal to number of dimensions minus one.
178184 NotImplementedError: If can't determine chunking automatically for 4D+.
179185
180186 Examples:
@@ -342,6 +348,20 @@ def segy_to_mdio( # noqa: C901
342348 ... chunksize=(8, 2, 256, 512),
343349 ... grid_overrides={"HasDuplicates": True},
344350 ... )
351+
352+ >>> segy_to_mdio(
353+ ... segy_path="prefix/shot_file.segy",
354+ ... mdio_path_or_buffer="s3://bucket/shot_file.mdio",
355+ ... index_bytes=(133, 171, 17, 137, 13),
356+ ... index_lengths=(2, 2, 4, 2, 4),
357+ ... index_names=("shot_line", "gun", "shot_point", "cable", "channel"),
358+ ... chunksize=(1, 1, 8, 1, 128, 1024),
359+ ... grid_overrides={
360+ ... "AutoShotWrap": True,
361+ ... "AutoChannelWrap": True,
362+ ... "AutoChannelTraceQC": 1000000
363+ ... },
364+ ... live_mask_chunksize=(1, 1, 8, 1, 128),
345365 """
346366 if index_names is None :
347367 index_names = [f"dim_{ i } " for i in range (len (index_bytes ))]
@@ -356,6 +376,14 @@ def segy_to_mdio( # noqa: C901
356376 f"equal to array dimensions={ len (index_bytes ) + 1 } " ,
357377 )
358378 raise ValueError (message )
379+
380+ if live_mask_chunksize is not None :
381+ if len (live_mask_chunksize ) != len (index_bytes ):
382+ message = (
383+ f"Length of live_mask_chunksize={ len (live_mask_chunksize )} must be " ,
384+ f"equal to array dimensions={ len (index_bytes )} " ,
385+ )
386+ raise ValueError (message )
359387
360388 # Handle storage options and check permissions etc
361389 if storage_options_input is None :
@@ -424,12 +452,15 @@ def segy_to_mdio( # noqa: C901
424452 trace_count = np .count_nonzero (grid .live_mask )
425453 write_attribute (name = "trace_count" , zarr_group = zarr_root , attribute = trace_count )
426454
455+ if live_mask_chunksize is None :
456+ live_mask_chunksize = - 1
457+
427458 # Note, live mask is not chunked since it's bool and small.
428459 zarr_root ["metadata" ].create_dataset (
429460 data = grid .live_mask ,
430461 name = "live_mask" ,
431462 shape = grid .shape [:- 1 ],
432- chunks = - 1 ,
463+ chunks = live_mask_chunksize ,
433464 dimension_separator = "/" ,
434465 )
435466
0 commit comments