1414from mdio .constants import UINT32_MAX
1515from mdio .core import Grid
1616from mdio .segy .byte_utils import ByteOrder
17+ from mdio .segy .byte_utils import Dtype
18+ from mdio .segy .ibm_float import ibm2ieee
1719
1820
1921def header_scan_worker (
2022 segy_path_or_handle : str | segyio .SegyFile ,
2123 trace_range : Sequence [int ],
2224 byte_locs : Sequence [int ],
23- byte_lengths : Sequence [int ],
25+ byte_types : Sequence [Dtype ],
26+ index_names : Sequence [str ],
2427 segy_endian : str ,
25- ) -> ArrayLike :
28+ ) -> dict [ str , ArrayLike ] :
2629 """Header scan worker.
2730
2831 Can accept file path or segyio.SegyFile.
@@ -36,9 +39,9 @@ def header_scan_worker(
3639 Args:
3740 segy_path_or_handle: Path or handle to the input SEG-Y file
3841 byte_locs: Byte locations to return. It will be a subset of the headers.
39- byte_lengths: Tuple consisting of the byte lengths for the index
40- attributes. None sets it to 4 per index
42+ byte_types: Tuple consisting of the data types for the index attributes.
4143 trace_range: Tuple consisting of the trace ranges to read
44+ index_names: Tuple of the names for the index attributes
4245 segy_endian: Endianness of the input SEG-Y. Rev.2 allows little endian
4346
4447 Returns:
@@ -77,14 +80,14 @@ def header_scan_worker(
7780 # Pads the rest of the data with voids.
7881 endian = ByteOrder [segy_endian .upper ()]
7982
80- # Handle byte locations and word lengths that are not specified for numpy struct
81- lengths = [4 if length is None else length for length in byte_lengths ]
83+ # Handle byte offsets
8284 offsets = [0 if byte_loc is None else byte_loc - 1 for byte_loc in byte_locs ]
85+ formats = [type_ .numpy_dtype .newbyteorder (endian ) for type_ in byte_types ]
8386
8487 struct_dtype = np .dtype (
8588 {
86- "names" : [ f"dim_ { idx } " for idx in range ( len ( byte_locs ))] ,
87- "formats" : [ endian + "i" + str ( length ) for length in lengths ] ,
89+ "names" : index_names ,
90+ "formats" : formats ,
8891 "offsets" : offsets ,
8992 "itemsize" : 240 ,
9093 }
@@ -95,17 +98,37 @@ def header_scan_worker(
9598 block_headers = b"" .join ([trace_headers .buf for trace_headers in block_headers ])
9699 n_traces = stop - start
97100 block_headers = np .frombuffer (block_headers , struct_dtype , count = n_traces )
98- block_headers = [ block_headers [dim ] for dim in block_headers . dtype . names ]
101+ block_headers = { name : block_headers [name ] for name in index_names }
99102
100- block_headers = np .column_stack (block_headers )
103+ out_dtype = []
104+ for name , type_ in zip (index_names , byte_types ): # noqa: B905
105+ if type_ == Dtype .IBM32 :
106+ native_dtype = Dtype .FLOAT32 .numpy_dtype
107+ else :
108+ native_dtype = type_ .numpy_dtype
101109
102- if None in byte_locs :
103- # Zero out the junk we read for `None` byte locations.
104- # We could have multiple None values.
105- none_idx = tuple (i for i , val in enumerate (byte_locs ) if val is None )
106- block_headers [:, none_idx ] = 0
110+ out_dtype .append ((name , native_dtype ))
107111
108- return block_headers
112+ out_array = np .empty (n_traces , out_dtype )
113+
114+ # TODO: Add strict=True and remove noqa when minimum Python is 3.10
115+ for name , loc , type_ in zip (index_names , byte_locs , byte_types ): # noqa: B905
116+ # Handle exception when a byte_loc is None
117+ if loc is None :
118+ out_array [name ] = 0
119+ del block_headers [name ]
120+ continue
121+
122+ header = block_headers [name ]
123+
124+ if type_ == Dtype .IBM32 :
125+ header = ibm2ieee (header )
126+
127+ out_array [name ] = header
128+
129+ del block_headers [name ]
130+
131+ return out_array
109132
110133
111134def trace_worker (
0 commit comments