@@ -311,6 +311,39 @@ def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None:
311311        }
312312    )
313313
314+ def  _chunk_variable (ds : Dataset , grid : Grid , variable_name : str ) ->  None :
315+     from  mdio .schemas .v1 .dataset_builder  import  ChunkGridMetadata 
316+     from  mdio .schemas .metadata  import  ChunkGridMetadata 
317+     from  mdio .schemas .chunk_grid  import  RegularChunkGrid , RegularChunkShape 
318+     from  mdio .core .utils_write  import  get_constrained_chunksize 
319+     from  mdio .core .utils_write  import  MAX_SIZE_LIVE_MASK 
320+     from  mdio .schemas .v1 .variable  import  VariableMetadata 
321+     
322+     # Find the variable by name 
323+     idx  =  - 1 
324+     for  i  in  range (len (ds .variables )):
325+         if  ds .variables [i ].name  ==  variable_name :
326+             idx  =  i 
327+             break 
328+     if  idx  ==  - 1 :
329+         raise  ValueError (f"Variable '{ variable_name }  ' not found in dataset." )
330+     
331+     # Create the chunk grid metadata 
332+     t  =  ds .variables [idx ].data_type 
333+     if  t  ==  "bool" :
334+         target_size  =  MAX_SIZE_LIVE_MASK 
335+     else :
336+         target_size  =  128 * 1024 ** 2 
337+ 
338+     chunks  =  ChunkGridMetadata (chunk_grid = RegularChunkGrid (configuration = RegularChunkShape (chunk_shape = get_constrained_chunksize (grid .live_mask .shape , t , target_size ))))
339+ 
340+     # Update the variable's metadata 
341+     if  ds .variables [idx ].metadata  is  None :
342+         # Create new metadata with the chunk grid 
343+         ds .variables [idx ].metadata  =  VariableMetadata (chunk_grid = chunks .chunk_grid )
344+     else :
345+         # Update existing metadata 
346+         ds .variables [idx ].metadata .chunk_grid  =  chunks .chunk_grid 
314347
315348def  segy_to_mdio (
316349    segy_spec : SegySpec ,
@@ -359,6 +392,9 @@ def segy_to_mdio(
359392    )
360393
361394    _add_text_binary_headers (dataset = mdio_ds , segy_file = segy_file )
395+     _chunk_variable (ds = mdio_ds , grid = grid , variable_name = "trace_mask" )
396+     for  coord  in  mdio_template .coordinate_names :
397+         _chunk_variable (ds = mdio_ds , grid = grid , variable_name = coord )
362398
363399    xr_dataset : xr_Dataset  =  to_xarray_dataset (mdio_ds = mdio_ds )
364400
0 commit comments