1212import zarr
1313from segy import SegyFile
1414from segy .config import SegyFileSettings
15- from segy .standards .codes import MeasurementSystem as segy_MeasurementSystem
16- from segy .standards .fields .trace import Rev0 as TraceHeaderFieldsRev0
15+ from segy .config import SegyHeaderOverrides
16+ from segy .standards .codes import MeasurementSystem as SegyMeasurementSystem
17+ from segy .standards .fields import binary as binary_header_fields
1718
1819from mdio .api .io import _normalize_path
1920from mdio .api .io import to_mdio
3637from mdio .core .utils_write import MAX_SIZE_LIVE_MASK
3738from mdio .core .utils_write import get_constrained_chunksize
3839from mdio .segy import blocked_io
40+ from mdio .segy .scalar import SCALE_COORDINATE_KEYS
41+ from mdio .segy .scalar import _apply_coordinate_scalar
42+ from mdio .segy .scalar import _get_coordinate_scalar
3943from mdio .segy .utilities import get_grid_plan
4044
4145if TYPE_CHECKING :
5458logger = logging .getLogger (__name__ )
5559
5660
61+ MEASUREMENT_SYSTEM_KEY = binary_header_fields .Rev0 .MEASUREMENT_SYSTEM_CODE .model .name
62+
63+
5764def grid_density_qc (grid : Grid , num_traces : int ) -> None :
5865 """Quality control for sensible grid density during SEG-Y to MDIO conversion.
5966
@@ -269,6 +276,7 @@ def populate_non_dim_coordinates(
269276 grid : Grid ,
270277 coordinates : dict [str , SegyHeaderArray ],
271278 drop_vars_delayed : list [str ],
279+ horizontal_coordinate_scalar : int ,
272280) -> tuple [xr_Dataset , list [str ]]:
273281 """Populate the xarray dataset with coordinate variables."""
274282 non_data_domain_dims = grid .dim_names [:- 1 ] # minus the data domain dimension
@@ -282,6 +290,10 @@ def populate_non_dim_coordinates(
282290
283291 not_null = coord_trace_indices != grid .map .fill_value
284292 tmp_coord_values [not_null ] = coord_values [coord_trace_indices [not_null ]]
293+
294+ if coord_name in SCALE_COORDINATE_KEYS :
295+ tmp_coord_values = _apply_coordinate_scalar (tmp_coord_values , horizontal_coordinate_scalar )
296+
285297 dataset [coord_name ][:] = tmp_coord_values
286298 drop_vars_delayed .append (coord_name )
287299
@@ -291,16 +303,22 @@ def populate_non_dim_coordinates(
291303 return dataset , drop_vars_delayed
292304
293305
294- def _get_horizontal_coordinate_unit (segy_headers : list [ Dimension ] ) -> LengthUnitModel | None :
306+ def _get_horizontal_coordinate_unit (segy_info : SegyFileHeaderDump ) -> LengthUnitModel | None :
295307 """Get the coordinate unit from the SEG-Y headers."""
296- name = TraceHeaderFieldsRev0 .COORDINATE_UNIT .name .upper ()
297- unit_hdr = next ((c for c in segy_headers if c .name .upper () == name ), None )
298- if unit_hdr is None or len (unit_hdr .coords ) == 0 :
308+ measurement_system_code = int (segy_info .binary_header_dict [MEASUREMENT_SYSTEM_KEY ])
309+
310+ if measurement_system_code not in (1 , 2 ):
311+ logger .warning (
312+ "Unexpected value in coordinate unit (%s) header: %s. Can't extract coordinate unit and will "
313+ "ingest without coordinate units." ,
314+ MEASUREMENT_SYSTEM_KEY ,
315+ measurement_system_code ,
316+ )
299317 return None
300318
301- if segy_MeasurementSystem ( unit_hdr . coords [ 0 ]) == segy_MeasurementSystem .METERS :
319+ if measurement_system_code == SegyMeasurementSystem .METERS :
302320 unit = LengthUnitEnum .METER
303- if segy_MeasurementSystem ( unit_hdr . coords [ 0 ]) == segy_MeasurementSystem .FEET :
321+ if measurement_system_code == SegyMeasurementSystem .FEET :
304322 unit = LengthUnitEnum .FOOT
305323
306324 return LengthUnitModel (length = unit )
@@ -310,6 +328,7 @@ def _populate_coordinates(
310328 dataset : xr_Dataset ,
311329 grid : Grid ,
312330 coords : dict [str , SegyHeaderArray ],
331+ horizontal_coordinate_scalar : int ,
313332) -> tuple [xr_Dataset , list [str ]]:
314333 """Populate dim and non-dim coordinates in the xarray dataset and write to Zarr.
315334
@@ -319,6 +338,7 @@ def _populate_coordinates(
319338 dataset: The xarray dataset to populate.
320339 grid: The grid object containing the grid map.
321340 coords: The non-dim coordinates to populate.
341+ horizontal_coordinate_scalar: The X/Y coordinate scalar from the SEG-Y file.
322342
323343 Returns:
324344 Xarray dataset with filled coordinates and updated variables to drop after writing
@@ -329,7 +349,11 @@ def _populate_coordinates(
329349
330350 # Populate the non-dimension coordinate variables (N-dim arrays)
331351 dataset , vars_to_drop_later = populate_non_dim_coordinates (
332- dataset , grid , coordinates = coords , drop_vars_delayed = drop_vars_delayed
352+ dataset ,
353+ grid ,
354+ coordinates = coords ,
355+ drop_vars_delayed = drop_vars_delayed ,
356+ horizontal_coordinate_scalar = horizontal_coordinate_scalar ,
333357 )
334358
335359 return dataset , drop_vars_delayed
@@ -465,6 +489,7 @@ def segy_to_mdio( # noqa PLR0913
465489 output_path : UPath | Path | str ,
466490 overwrite : bool = False ,
467491 grid_overrides : dict [str , Any ] | None = None ,
492+ segy_header_overrides : SegyHeaderOverrides | None = None ,
468493) -> None :
469494 """A function that converts a SEG-Y file to an MDIO v1 file.
470495
@@ -477,6 +502,7 @@ def segy_to_mdio( # noqa PLR0913
477502 output_path: The universal path for the output MDIO v1 file.
478503 overwrite: Whether to overwrite the output file if it already exists. Defaults to False.
479504 grid_overrides: Option to add grid overrides.
505+ segy_header_overrides: Option to override specific SEG-Y headers during ingestion.
480506
481507 Raises:
482508 FileExistsError: If the output location already exists and overwrite is False.
@@ -489,7 +515,12 @@ def segy_to_mdio( # noqa PLR0913
489515 raise FileExistsError (err )
490516
491517 segy_settings = SegyFileSettings (storage_options = input_path .storage_options )
492- segy_file = SegyFile (url = input_path .as_posix (), spec = segy_spec , settings = segy_settings )
518+ segy_file = SegyFile (
519+ url = input_path .as_posix (),
520+ spec = segy_spec ,
521+ settings = segy_settings ,
522+ header_overrides = segy_header_overrides ,
523+ )
493524 segy_info : SegyFileHeaderDump = _get_segy_file_header_dump (segy_file )
494525
495526 segy_dimensions , segy_headers = _scan_for_headers (segy_file , mdio_template , grid_overrides )
@@ -506,7 +537,7 @@ def segy_to_mdio( # noqa PLR0913
506537 logger .warning ("MDIO__IMPORT__RAW_HEADERS is experimental and expected to change or be removed." )
507538 mdio_template = _add_raw_headers_to_template (mdio_template )
508539
509- horizontal_unit = _get_horizontal_coordinate_unit (segy_dimensions )
540+ horizontal_unit = _get_horizontal_coordinate_unit (segy_info )
510541 mdio_ds : Dataset = mdio_template .build_dataset (
511542 name = mdio_template .name ,
512543 sizes = grid .shape ,
@@ -523,10 +554,12 @@ def segy_to_mdio( # noqa PLR0913
523554
524555 xr_dataset : xr_Dataset = to_xarray_dataset (mdio_ds = mdio_ds )
525556
557+ coordinate_scalar = _get_coordinate_scalar (segy_file )
526558 xr_dataset , drop_vars_delayed = _populate_coordinates (
527559 dataset = xr_dataset ,
528560 grid = grid ,
529561 coords = non_dim_coords ,
562+ horizontal_coordinate_scalar = coordinate_scalar ,
530563 )
531564
532565 xr_dataset = _add_segy_file_headers (xr_dataset , segy_info )
0 commit comments