1313
1414from mdio .api .io import to_mdio
1515from mdio .builder .schemas .dtype import ScalarType
16+ from mdio .segy ._disaster_recovery_wrapper import get_header_raw_and_transformed
1617
1718if TYPE_CHECKING :
1819 from segy .arrays import HeaderArray
@@ -81,7 +82,6 @@ def header_scan_worker(
8182
8283 return cast ("HeaderArray" , trace_header )
8384
84-
8585def trace_worker ( # noqa: PLR0913
8686 segy_kw : SegyFileArguments ,
8787 output_path : UPath ,
@@ -122,26 +122,31 @@ def trace_worker( # noqa: PLR0913
122122 zarr_config .set ({"threading.max_workers" : 1 })
123123
124124 live_trace_indexes = local_grid_map [not_null ].tolist ()
125- traces = segy_file .trace [live_trace_indexes ]
126125
127126 header_key = "headers"
128127 raw_header_key = "raw_headers"
129128
129+ # Used to disable the reverse transforms if we aren't going to write the raw headers
130+ do_reverse_transforms = False
131+
130132 # Get subset of the dataset that has not yet been saved
131133 # The headers might not be present in the dataset
132134 worker_variables = [data_variable_name ]
133135 if header_key in dataset .data_vars : # Keeping the `if` here to allow for more worker configurations
134136 worker_variables .append (header_key )
135137 if raw_header_key in dataset .data_vars :
138+
139+ do_reverse_transforms = True
136140 worker_variables .append (raw_header_key )
137141
142+ raw_headers , transformed_headers , traces = get_header_raw_and_transformed (segy_file , live_trace_indexes , do_reverse_transforms = do_reverse_transforms )
138143 ds_to_write = dataset [worker_variables ]
139144
140145 if header_key in worker_variables :
141146 # TODO(BrianMichell): Implement this better so that we can enable fill values without changing the code
142147 # https://github.com/TGSAI/mdio-python/issues/584
143148 tmp_headers = np .zeros_like (dataset [header_key ])
144- tmp_headers [not_null ] = traces . header
149+ tmp_headers [not_null ] = transformed_headers
145150 # Create a new Variable object to avoid copying the temporary array
146151 # The ideal solution is to use `ds_to_write[header_key][:] = tmp_headers`
147152 # but Xarray appears to be copying memory instead of doing direct assignment.
@@ -153,41 +158,19 @@ def trace_worker( # noqa: PLR0913
153158 attrs = ds_to_write [header_key ].attrs ,
154159 encoding = ds_to_write [header_key ].encoding , # Not strictly necessary, but safer than not doing it.
155160 )
161+ del transformed_headers # Manage memory
156162 if raw_header_key in worker_variables :
157163 tmp_raw_headers = np .zeros_like (dataset [raw_header_key ])
158-
159- # Get the indices where we need to place results
160- live_mask = not_null
161- live_positions = np .where (live_mask .ravel ())[0 ]
162-
163- if len (live_positions ) > 0 :
164- # Calculate byte ranges for headers
165- header_size = 240
166- trace_offset = segy_file .spec .trace .offset
167- trace_itemsize = segy_file .spec .trace .itemsize
168-
169- starts = []
170- ends = []
171- for global_trace_idx in live_trace_indexes :
172- header_start = trace_offset + global_trace_idx * trace_itemsize
173- header_end = header_start + header_size
174- starts .append (header_start )
175- ends .append (header_end )
176-
177- # Capture raw bytes
178- raw_header_bytes = merge_cat_file (segy_file .fs , segy_file .url , starts , ends )
179-
180- # Convert and place results
181- raw_headers_array = np .frombuffer (bytes (raw_header_bytes ), dtype = "|V240" )
182- tmp_raw_headers .ravel ()[live_positions ] = raw_headers_array
164+ tmp_raw_headers [not_null ] = raw_headers .view ("|V240" )
183165
184166 ds_to_write [raw_header_key ] = Variable (
185167 ds_to_write [raw_header_key ].dims ,
186168 tmp_raw_headers ,
187169 attrs = ds_to_write [raw_header_key ].attrs ,
188- encoding = ds_to_write [raw_header_key ].encoding ,
189- )
170+ encoding = ds_to_write [raw_header_key ].encoding , # Not strictly necessary, but safer than not doing it.
171+
190172
173+ del raw_headers # Manage memory
191174 data_variable = ds_to_write [data_variable_name ]
192175 fill_value = _get_fill_value (ScalarType (data_variable .dtype .name ))
193176 tmp_samples = np .full_like (data_variable , fill_value = fill_value )
0 commit comments