@@ -293,41 +293,40 @@ def _populate_coordinates(
293293 return dataset , drop_vars_delayed
294294
295295
296- def _add_segy_ingest_attributes (dataset : Dataset , segy_file : SegyFile , grid_overrides : dict [str , Any ] | None ) -> None :
297- text_header = segy_file .text_header .splitlines ()
298- # Validate:
299- # text_header this should be a 40-items array of strings with width of 80 characters.
300- item_count = 40
301- if len (text_header ) != item_count :
302- err = f"Invalid text header count: expected { item_count } , got { len (text_header )} "
296+ def _add_segy_file_headers (xr_dataset : xr_Dataset , segy_file : SegyFile ) -> xr_Dataset :
297+ expected_rows = 40
298+ expected_cols = 80
299+
300+ text_header = segy_file .text_header
301+ text_header_rows = text_header .splitlines ()
302+ text_header_cols_bad = [len (row ) != expected_cols for row in text_header_rows ]
303+
304+ if len (text_header_rows ) != expected_rows :
305+ err = f"Invalid text header count: expected { expected_rows } , got { len (text_header )} "
303306 raise ValueError (err )
304- char_count = 80
305- for i , line in enumerate (text_header ):
306- if len (line ) != char_count :
307- err = f"Invalid text header { i } line length: expected { char_count } , got { len (line )} "
308- raise ValueError (err )
309- ext_text_header = segy_file .ext_text_header
310307
311- # If using SegyFile.ext_text_header this should be a minimum of 40 elements and must
312- # capture all textual information (ensure text_header is a subset of ext_text_header).
313- if ext_text_header is not None :
314- for ext_hdr in ext_text_header :
315- text_header .append (ext_hdr .splitlines ())
308+ if any (text_header_cols_bad ):
309+ err = f"Invalid text header columns: expected { expected_cols } per line."
310+ raise ValueError (err )
311+
312+ xr_dataset ["segy_file_header" ] = ((), "" )
313+ xr_dataset ["segy_file_header" ].attrs .update (
314+ {
315+ "textHeader" : text_header ,
316+ "binaryHeader" : segy_file .binary_header .to_dict (),
317+ }
318+ )
319+
320+ return xr_dataset
321+
316322
317- # Handle case where it may not have any metadata yet
323+ def _add_grid_override_to_metadata (dataset : Dataset , grid_overrides : dict [str , Any ] | None ) -> None :
324+ """Add grid override to Dataset metadata if needed."""
318325 if dataset .metadata .attributes is None :
319326 dataset .metadata .attributes = {}
320327
321- segy_attributes = {
322- "textHeader" : text_header ,
323- "binaryHeader" : segy_file .binary_header .to_dict (),
324- }
325-
326328 if grid_overrides is not None :
327- segy_attributes ["gridOverrides" ] = grid_overrides
328-
329- # Update the attributes with the text and binary headers.
330- dataset .metadata .attributes .update (segy_attributes )
329+ dataset .metadata .attributes ["gridOverrides" ] = grid_overrides
331330
332331
333332def segy_to_mdio ( # noqa PLR0913
@@ -377,7 +376,7 @@ def segy_to_mdio( # noqa PLR0913
377376 header_dtype = header_dtype ,
378377 )
379378
380- _add_segy_ingest_attributes (dataset = mdio_ds , segy_file = segy_file , grid_overrides = grid_overrides )
379+ _add_grid_override_to_metadata (dataset = mdio_ds , grid_overrides = grid_overrides )
381380
382381 xr_dataset : xr_Dataset = to_xarray_dataset (mdio_ds = mdio_ds )
383382
@@ -387,8 +386,9 @@ def segy_to_mdio( # noqa PLR0913
387386 coords = non_dim_coords ,
388387 )
389388
390- xr_dataset . trace_mask . data [:] = grid . live_mask
389+ xr_dataset = _add_segy_file_headers ( xr_dataset , segy_file )
391390
391+ xr_dataset .trace_mask .data [:] = grid .live_mask
392392 # IMPORTANT: Do not drop the "trace_mask" here, as it will be used later in
393393 # blocked_io.to_zarr() -> _workers.trace_worker()
394394
0 commit comments