|
28 | 28 | logger = logging.getLogger(__name__) |
29 | 29 |
|
30 | 30 |
|
| 31 | +def _filter_raw_unspecified_fields(headers: NDArray) -> NDArray: |
| 32 | + """Filter out __MDIO_RAW_UNSPECIFIED_Field_* fields from headers array. |
| 33 | + |
| 34 | + These fields are added during SEGY import to preserve raw header bytes, |
| 35 | + but they cause dtype mismatches during export. This function removes them. |
| 36 | + |
| 37 | + Args: |
| 38 | + headers: Header array that may contain raw unspecified fields. |
| 39 | + |
| 40 | + Returns: |
| 41 | + Header array with raw unspecified fields removed. |
| 42 | + """ |
| 43 | + if headers.dtype.names is None: |
| 44 | + return headers |
| 45 | + |
| 46 | + # Find field names that don't start with __MDIO_RAW_UNSPECIFIED_ |
| 47 | + field_names = [name for name in headers.dtype.names |
| 48 | + if not name.startswith("__MDIO_RAW_UNSPECIFIED_")] |
| 49 | + |
| 50 | + if len(field_names) == len(headers.dtype.names): |
| 51 | + # No raw unspecified fields found, return as-is |
| 52 | + return headers |
| 53 | + |
| 54 | + # Create new structured array with only the non-raw fields |
| 55 | + new_dtype = [(name, headers.dtype.fields[name][0]) for name in field_names] |
| 56 | + filtered_headers = np.empty(headers.shape, dtype=new_dtype) |
| 57 | + |
| 58 | + for name in field_names: |
| 59 | + filtered_headers[name] = headers[name] |
| 60 | + |
| 61 | + return filtered_headers |
| 62 | + |
| 63 | + |
31 | 64 | def make_segy_factory(spec: SegySpec, binary_header: dict[str, int]) -> SegyFactory: |
32 | 65 | """Generate SEG-Y factory from MDIO metadata.""" |
33 | 66 | sample_interval = binary_header["sample_interval"] |
@@ -167,7 +200,9 @@ def serialize_to_segy_stack( # noqa: PLR0913 |
167 | 200 | samples = samples[live_mask] |
168 | 201 | headers = headers[live_mask] |
169 | 202 |
|
170 | | - buffer = segy_factory.create_traces(headers, samples) |
| 203 | + # Filter out raw unspecified fields that cause dtype mismatches |
| 204 | + filtered_headers = _filter_raw_unspecified_fields(headers) |
| 205 | + buffer = segy_factory.create_traces(filtered_headers, samples) |
171 | 206 |
|
172 | 207 | global_index = block_start[0] |
173 | 208 | record_id_str = str(global_index) |
@@ -199,7 +234,9 @@ def serialize_to_segy_stack( # noqa: PLR0913 |
199 | 234 | rec_samples = samples[rec_index][rec_live_mask] |
200 | 235 | rec_headers = headers[rec_index][rec_live_mask] |
201 | 236 |
|
202 | | - buffer = segy_factory.create_traces(rec_headers, rec_samples) |
| 237 | + # Filter out raw unspecified fields that cause dtype mismatches |
| 238 | + filtered_headers = _filter_raw_unspecified_fields(rec_headers) |
| 239 | + buffer = segy_factory.create_traces(filtered_headers, rec_samples) |
203 | 240 |
|
204 | 241 | global_index = tuple(block_start[i] + rec_index[i] for i in range(record_ndim)) |
205 | 242 | record_id_str = "/".join(map(str, global_index)) |
|
0 commit comments