Skip to content

Commit adc44d2

Browse files
committed
fix(regression): make dataset serialization less eager
1 parent 4cb2321 commit adc44d2

File tree

1 file changed

+6
-12
lines changed

1 file changed

+6
-12
lines changed

src/mdio/converters/segy.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -376,18 +376,12 @@ def segy_to_mdio(
376376
# IMPORTANT: Do not drop the "trace_mask" here, as it will be used later in
377377
# blocked_io.to_zarr() -> _workers.trace_worker()
378378

379-
# Write the xarray dataset to Zarr with as following:
380-
# Populated arrays:
381-
# - 1D dimensional coordinates
382-
# - ND non-dimensional coordinates
383-
# - ND trace_mask
384-
# Empty arrays (will be populated later in chunks):
385-
# - ND+1 traces
386-
# - ND headers (no _FillValue set due to the bug https://github.com/TGSAI/mdio-python/issues/582)
387-
# This will create the Zarr store with the correct structure
388-
# TODO(Dmitriy Repin): do chunked write for non-dimensional coordinates and trace_mask
389-
# https://github.com/TGSAI/mdio-python/issues/587
390-
xr_dataset.to_zarr(store=output_location.uri, mode="w", write_empty_chunks=False, zarr_format=2, compute=True)
379+
# This will create the Zarr store with the correct structure but with empty arrays
380+
xr_dataset.to_zarr(store=output_location.uri, mode="w", write_empty_chunks=False, zarr_format=2, compute=False)
381+
382+
# This will write the non-dimension coordinates and trace mask
383+
meta_ds = xr_dataset[drop_vars_delayed + ["trace_mask"]]
384+
meta_ds.to_zarr(store=output_location.uri, mode="r+", write_empty_chunks=False, zarr_format=2, compute=True)
391385

392386
# Now we can drop them to simplify chunked write of the data variable
393387
xr_dataset = xr_dataset.drop_vars(drop_vars_delayed)

0 commit comments

Comments
 (0)