| 
14 | 14 | 
 
  | 
15 | 15 | from mdio.api.io import _normalize_path  | 
16 | 16 | from mdio.api.io import to_mdio  | 
 | 17 | +from mdio.builder.schemas.chunk_grid import RegularChunkGrid  | 
 | 18 | +from mdio.builder.schemas.chunk_grid import RegularChunkShape  | 
17 | 19 | from mdio.builder.schemas.v1.units import LengthUnitEnum  | 
18 | 20 | from mdio.builder.schemas.v1.units import LengthUnitModel  | 
 | 21 | +from mdio.builder.schemas.v1.variable import VariableMetadata  | 
19 | 22 | from mdio.builder.xarray_builder import to_xarray_dataset  | 
20 | 23 | from mdio.converters.exceptions import EnvironmentFormatError  | 
21 | 24 | from mdio.converters.exceptions import GridTraceCountError  | 
22 | 25 | from mdio.converters.exceptions import GridTraceSparsityError  | 
23 | 26 | from mdio.converters.type_converter import to_structured_type  | 
24 | 27 | from mdio.core.grid import Grid  | 
 | 28 | +from mdio.core.utils_write import MAX_COORDINATES_BYTES  | 
 | 29 | +from mdio.core.utils_write import MAX_SIZE_LIVE_MASK  | 
 | 30 | +from mdio.core.utils_write import get_constrained_chunksize  | 
25 | 31 | from mdio.segy import blocked_io  | 
26 | 32 | from mdio.segy.utilities import get_grid_plan  | 
27 | 33 | 
 
  | 
@@ -330,6 +336,36 @@ def _add_segy_ingest_attributes(dataset: Dataset, segy_file: SegyFile, grid_over  | 
330 | 336 |     dataset.metadata.attributes.update(segy_attributes)  | 
331 | 337 | 
 
  | 
332 | 338 | 
 
  | 
 | 339 | +def _chunk_variable(ds: Dataset, variable_name: str) -> None:  | 
 | 340 | +    """Determines the chunking for a Varible in the Dataset."""  | 
 | 341 | +    idx = -1  | 
 | 342 | +    for i in range(len(ds.variables)):  | 
 | 343 | +        if ds.variables[i].name == variable_name:  | 
 | 344 | +            idx = i  | 
 | 345 | +            break  | 
 | 346 | + | 
 | 347 | +    def determine_target_size(var_type: str) -> int:  | 
 | 348 | +        """Determines the target size (in bytes) for a Variable based on its type."""  | 
 | 349 | +        if var_type == "bool":  | 
 | 350 | +            return MAX_SIZE_LIVE_MASK  | 
 | 351 | +        return MAX_COORDINATES_BYTES  | 
 | 352 | + | 
 | 353 | +    # Create the chunk grid metadata  | 
 | 354 | +    var_type = ds.variables[idx].data_type  | 
 | 355 | +    full_shape = tuple(dim.size for dim in ds.variables[idx].dimensions)  | 
 | 356 | +    target_size = determine_target_size(var_type)  | 
 | 357 | + | 
 | 358 | +    chunk_shape = get_constrained_chunksize(full_shape, var_type, target_size)  | 
 | 359 | +    chunks = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape))  | 
 | 360 | + | 
 | 361 | +    # Update the variable's metadata with the new chunk grid  | 
 | 362 | +    if ds.variables[idx].metadata is None:  | 
 | 363 | +        # ds.variables[idx].metadata = VariableMetadata(chunk_shape=chunks.chunk_shape)  | 
 | 364 | +        ds.variables[idx].metadata = VariableMetadata(chunk_grid=chunks)  | 
 | 365 | +    else:  | 
 | 366 | +        ds.variables[idx].metadata.chunk_grid = chunks  | 
 | 367 | + | 
 | 368 | + | 
333 | 369 | def segy_to_mdio(  # noqa PLR0913  | 
334 | 370 |     segy_spec: SegySpec,  | 
335 | 371 |     mdio_template: AbstractDatasetTemplate,  | 
@@ -379,6 +415,10 @@ def segy_to_mdio(  # noqa PLR0913  | 
379 | 415 | 
 
  | 
380 | 416 |     _add_segy_ingest_attributes(dataset=mdio_ds, segy_file=segy_file, grid_overrides=grid_overrides)  | 
381 | 417 | 
 
  | 
 | 418 | +    _chunk_variable(ds=mdio_ds, variable_name="trace_mask")  # trace_mask is a Variable and not a Coordinate  | 
 | 419 | +    for coord in mdio_template.coordinate_names:  | 
 | 420 | +        _chunk_variable(ds=mdio_ds, variable_name=coord)  | 
 | 421 | + | 
382 | 422 |     xr_dataset: xr_Dataset = to_xarray_dataset(mdio_ds=mdio_ds)  | 
383 | 423 | 
 
  | 
384 | 424 |     xr_dataset, drop_vars_delayed = _populate_coordinates(  | 
 | 
0 commit comments