@@ -343,6 +343,8 @@ def segy_to_mdio( # noqa: C901
343343 ... grid_overrides={"HasDuplicates": True},
344344 ... )
345345 """
346+ zarr .config .set ({"default_zarr_format" : 2 , "write_empty_chunks" : False })
347+
346348 if index_names is None :
347349 index_names = [f"dim_{ i } " for i in range (len (index_bytes ))]
348350
@@ -364,13 +366,8 @@ def segy_to_mdio( # noqa: C901
364366 if storage_options_output is None :
365367 storage_options_output = {}
366368
367- store = process_url (
368- url = mdio_path_or_buffer ,
369- mode = "w" ,
370- storage_options = storage_options_output ,
371- memory_cache_size = 0 , # Making sure disk caching is disabled,
372- disk_cache = False , # Making sure disk caching is disabled
373- )
369+ url = process_url (url = mdio_path_or_buffer , disk_cache = False )
370+ root_group = zarr .open_group (url , mode = "w" , storage_options = storage_options_output )
374371
375372 # Open SEG-Y with MDIO's SegySpec. Endianness will be inferred.
376373 mdio_spec = mdio_segy_spec ()
@@ -406,42 +403,43 @@ def segy_to_mdio( # noqa: C901
406403 logger .warning (f"Ingestion grid shape: { grid .shape } ." )
407404 raise GridTraceCountError (np .sum (grid .live_mask ), num_traces )
408405
409- zarr_root = create_zarr_hierarchy (
410- store = store ,
406+ root_group = create_zarr_hierarchy (
407+ root_group = root_group ,
411408 overwrite = overwrite ,
412409 )
413410
414411 # Get UTC time, then add local timezone information offset.
415412 iso_datetime = datetime .now (timezone .utc ).isoformat ()
416413
417- write_attribute (name = "created" , zarr_group = zarr_root , attribute = iso_datetime )
418- write_attribute (name = "api_version" , zarr_group = zarr_root , attribute = API_VERSION )
414+ write_attribute (name = "created" , zarr_group = root_group , attribute = iso_datetime )
415+ write_attribute (name = "api_version" , zarr_group = root_group , attribute = API_VERSION )
419416
420417 dimensions_dict = [dim .to_dict () for dim in dimensions ]
421- write_attribute (name = "dimension" , zarr_group = zarr_root , attribute = dimensions_dict )
418+ write_attribute (name = "dimension" , zarr_group = root_group , attribute = dimensions_dict )
422419
423420 # Write trace count
424421 trace_count = np .count_nonzero (grid .live_mask )
425- write_attribute (name = "trace_count" , zarr_group = zarr_root , attribute = trace_count )
422+ write_attribute (name = "trace_count" , zarr_group = root_group , attribute = trace_count )
426423
427424 # Note, live mask is not chunked since it's bool and small.
428- zarr_root ["metadata" ].create_dataset (
429- data = grid .live_mask ,
425+ live_mask_arr = root_group ["metadata" ].create_array (
430426 name = "live_mask" ,
431427 shape = grid .shape [:- 1 ],
432- chunks = - 1 ,
433- dimension_separator = "/" ,
428+ chunks = grid .shape [:- 1 ],
429+ dtype = "bool" ,
430+ chunk_key_encoding = {"name" : "v2" , "separator" : "/" },
434431 )
432+ live_mask_arr [...] = grid .live_mask [...]
435433
436434 write_attribute (
437435 name = "text_header" ,
438- zarr_group = zarr_root ["metadata" ],
436+ zarr_group = root_group ["metadata" ],
439437 attribute = text_header .split ("\n " ),
440438 )
441439
442440 write_attribute (
443441 name = "binary_header" ,
444- zarr_group = zarr_root ["metadata" ],
442+ zarr_group = root_group ["metadata" ],
445443 attribute = binary_header .to_dict (),
446444 )
447445
@@ -470,8 +468,8 @@ def segy_to_mdio( # noqa: C901
470468 stats = blocked_io .to_zarr (
471469 segy_file = segy ,
472470 grid = grid ,
473- data_root = zarr_root ["data" ],
474- metadata_root = zarr_root ["metadata" ],
471+ data_root = root_group ["data" ],
472+ metadata_root = root_group ["metadata" ],
475473 name = "_" .join (["chunked" , suffix ]),
476474 dtype = "float32" ,
477475 chunks = chunksize ,
@@ -480,17 +478,7 @@ def segy_to_mdio( # noqa: C901
480478 )
481479
482480 for key , value in stats .items ():
483- write_attribute (name = key , zarr_group = zarr_root , attribute = value )
484-
485- # Non-cached store for consolidating metadata.
486- # If caching is enabled the metadata may fall out of cache hence
487- # creating an incomplete `.zmetadata` file.
488- store_nocache = process_url (
489- url = mdio_path_or_buffer ,
490- mode = "r+" ,
491- storage_options = storage_options_output ,
492- memory_cache_size = 0 , # Making sure disk caching is disabled,
493- disk_cache = False , # Making sure disk caching is disabled
494- )
481+ write_attribute (name = key , zarr_group = root_group , attribute = value )
495482
496- zarr .consolidate_metadata (store_nocache )
483+ # Finalize Zarr for fast open
484+ zarr .consolidate_metadata (root_group .store )
0 commit comments