Skip to content

Commit e0ba0b0

Browse files
committed
Begin on-the-fly chunk size calculations for trace_mask and non-dimension coordinates
1 parent fcffad8 commit e0ba0b0

File tree

1 file changed

+36
-0
lines changed

1 file changed

+36
-0
lines changed

src/mdio/converters/segy.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,39 @@ def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None:
311311
}
312312
)
313313

314+
def _chunk_variable(ds: Dataset, grid: Grid, variable_name: str) -> None:
315+
from mdio.schemas.v1.dataset_builder import ChunkGridMetadata
316+
from mdio.schemas.metadata import ChunkGridMetadata
317+
from mdio.schemas.chunk_grid import RegularChunkGrid, RegularChunkShape
318+
from mdio.core.utils_write import get_constrained_chunksize
319+
from mdio.core.utils_write import MAX_SIZE_LIVE_MASK
320+
from mdio.schemas.v1.variable import VariableMetadata
321+
322+
# Find the variable by name
323+
idx = -1
324+
for i in range(len(ds.variables)):
325+
if ds.variables[i].name == variable_name:
326+
idx = i
327+
break
328+
if idx == -1:
329+
raise ValueError(f"Variable '{variable_name}' not found in dataset.")
330+
331+
# Create the chunk grid metadata
332+
t = ds.variables[idx].data_type
333+
if t == "bool":
334+
target_size = MAX_SIZE_LIVE_MASK
335+
else:
336+
target_size = 128*1024**2
337+
338+
chunks = ChunkGridMetadata(chunk_grid=RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=get_constrained_chunksize(grid.live_mask.shape, t, target_size))))
339+
340+
# Update the variable's metadata
341+
if ds.variables[idx].metadata is None:
342+
# Create new metadata with the chunk grid
343+
ds.variables[idx].metadata = VariableMetadata(chunk_grid=chunks.chunk_grid)
344+
else:
345+
# Update existing metadata
346+
ds.variables[idx].metadata.chunk_grid = chunks.chunk_grid
314347

315348
def segy_to_mdio(
316349
segy_spec: SegySpec,
@@ -359,6 +392,9 @@ def segy_to_mdio(
359392
)
360393

361394
_add_text_binary_headers(dataset=mdio_ds, segy_file=segy_file)
395+
_chunk_variable(ds=mdio_ds, grid=grid, variable_name="trace_mask")
396+
for coord in mdio_template.coordinate_names:
397+
_chunk_variable(ds=mdio_ds, grid=grid, variable_name=coord)
362398

363399
xr_dataset: xr_Dataset = to_xarray_dataset(mdio_ds=mdio_ds)
364400

0 commit comments

Comments
 (0)