@@ -163,7 +163,7 @@ def _build_and_check_grid(segy_dimensions: list[Dimension], segy_file: SegyFile,
163163
164164
165165def _get_coordinates (
166- segy_dimensions : list [ Dimension ] ,
166+ grid : Grid ,
167167 segy_headers : SegyHeaderArray ,
168168 mdio_template : AbstractDatasetTemplate ,
169169) -> tuple [list [Dimension ], dict [str , SegyHeaderArray ]]:
@@ -174,7 +174,7 @@ def _get_coordinates(
174174 The last dimension is always the vertical domain dimension
175175
176176 Args:
177- segy_dimensions: List of of all SEG-Y dimensions .
177+ grid: Inferred MDIO grid for SEG-Y file .
178178 segy_headers: Headers read in from SEG-Y file.
179179 mdio_template: The MDIO template to use for the conversion.
180180
@@ -188,19 +188,15 @@ def _get_coordinates(
188188 - A dict of non-dimension coordinates (str: N-D arrays).
189189 """
190190 dimensions_coords = []
191- dim_names = [dim .name for dim in segy_dimensions ]
192191 for dim_name in mdio_template .dimension_names :
193- try :
194- dim_index = dim_names .index (dim_name )
195- except ValueError :
192+ if dim_name not in grid .dim_names :
196193 err = f"Dimension '{ dim_name } ' was not found in SEG-Y dimensions."
197- raise ValueError (err ) from err
198- dimensions_coords .append (segy_dimensions [ dim_index ] )
194+ raise ValueError (err )
195+ dimensions_coords .append (grid . select_dim ( dim_name ) )
199196
200197 non_dim_coords : dict [str , SegyHeaderArray ] = {}
201- available_headers = segy_headers .dtype .names
202198 for coord_name in mdio_template .coordinate_names :
203- if coord_name not in available_headers :
199+ if coord_name not in segy_headers . dtype . names :
204200 err = f"Coordinate '{ coord_name } ' not found in SEG-Y dimensions."
205201 raise ValueError (err )
206202 non_dim_coords [coord_name ] = segy_headers [coord_name ]
@@ -227,12 +223,14 @@ def populate_non_dim_coordinates(
227223 """Populate the xarray dataset with coordinate variables."""
228224 not_null = grid .map [:] != UINT32_MAX
229225 for c_name , c_values in coordinates .items ():
230- dataset [c_name ].values [not_null ] = c_values
226+ c_tmp_array = dataset [c_name ].values
227+ c_tmp_array [not_null ] = c_values
228+ dataset [c_name ][:] = c_tmp_array
231229 drop_vars_delayed .append (c_name )
232230 return dataset , drop_vars_delayed
233231
234232
235- def _get_horizontal_coordinate_unit (segy_headers : list [Dimension ]) -> LengthUnitEnum | None :
233+ def _get_horizontal_coordinate_unit (segy_headers : list [Dimension ]) -> AllUnits | None :
236234 """Get the coordinate unit from the SEG-Y headers."""
237235 name = TraceHeaderFieldsRev0 .COORDINATE_UNIT .name .upper ()
238236 unit_hdr = next ((c for c in segy_headers if c .name .upper () == name ), None )
@@ -347,15 +345,17 @@ def segy_to_mdio(
347345
348346 grid = _build_and_check_grid (segy_dimensions , segy_file , segy_headers )
349347
350- dimensions , non_dim_coords = _get_coordinates (segy_dimensions , segy_headers , mdio_template )
351- shape = [len (dim .coords ) for dim in dimensions ]
348+ dimensions , non_dim_coords = _get_coordinates (grid , segy_headers , mdio_template )
352349 # TODO(Altay): Turn this dtype into packed representation
353350 # https://github.com/TGSAI/mdio-python/issues/601
354351 headers = to_structured_type (segy_spec .trace .header .dtype )
355352
356353 horizontal_unit = _get_horizontal_coordinate_unit (segy_dimensions )
357354 mdio_ds : Dataset = mdio_template .build_dataset (
358- name = mdio_template .name , sizes = shape , horizontal_coord_unit = horizontal_unit , headers = headers
355+ name = mdio_template .name ,
356+ sizes = grid .shape ,
357+ horizontal_coord_unit = horizontal_unit ,
358+ headers = headers ,
359359 )
360360
361361 _add_text_binary_headers (dataset = mdio_ds , segy_file = segy_file )
@@ -376,18 +376,12 @@ def segy_to_mdio(
376376 # IMPORTANT: Do not drop the "trace_mask" here, as it will be used later in
377377 # blocked_io.to_zarr() -> _workers.trace_worker()
378378
379- # Write the xarray dataset to Zarr with as following:
380- # Populated arrays:
381- # - 1D dimensional coordinates
382- # - ND non-dimensional coordinates
383- # - ND trace_mask
384- # Empty arrays (will be populated later in chunks):
385- # - ND+1 traces
386- # - ND headers (no _FillValue set due to the bug https://github.com/TGSAI/mdio-python/issues/582)
387- # This will create the Zarr store with the correct structure
388- # TODO(Dmitriy Repin): do chunked write for non-dimensional coordinates and trace_mask
389- # https://github.com/TGSAI/mdio-python/issues/587
390- xr_dataset .to_zarr (store = output_location .uri , mode = "w" , write_empty_chunks = False , zarr_format = 2 , compute = True )
379+ # This will create the Zarr store with the correct structure but with empty arrays
380+ xr_dataset .to_zarr (store = output_location .uri , mode = "w" , write_empty_chunks = False , zarr_format = 2 , compute = False )
381+
382+ # This will write the non-dimension coordinates and trace mask
383+ meta_ds = xr_dataset [drop_vars_delayed + ["trace_mask" ]]
384+ meta_ds .to_zarr (store = output_location .uri , mode = "r+" , write_empty_chunks = False , zarr_format = 2 , compute = True )
391385
392386 # Now we can drop them to simplify chunked write of the data variable
393387 xr_dataset = xr_dataset .drop_vars (drop_vars_delayed )
0 commit comments