Skip to content

Commit e5305b8

Browse files
committed
Remove raw field additions. Depends on segy >= 0.5.1
1 parent ac31e18 commit e5305b8

File tree

2 files changed

+2
-71
lines changed

2 files changed

+2
-71
lines changed

src/mdio/converters/segy.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,12 @@
44

55
import logging
66
import os
7-
from copy import deepcopy
87
from typing import TYPE_CHECKING
98

109
import numpy as np
1110
import zarr
1211
from segy import SegyFile
1312
from segy.config import SegySettings
14-
from segy.schema import HeaderField
15-
from segy.schema import ScalarType as ScalarType2
1613
from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem
1714
from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0
1815

@@ -344,33 +341,6 @@ def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: dict[str, A
344341
dataset.metadata.attributes["gridOverrides"] = grid_overrides
345342

346343

347-
def _scalar_to_size(scalar: ScalarType2) -> int:
348-
# TODO(BrianMichell): #0000 Lazy way to support conversion.
349-
if scalar == ScalarType2.STRING8:
350-
return 8
351-
try:
352-
return int(str(scalar)[-2:]) // 8
353-
except ValueError:
354-
return 1
355-
356-
357-
def _customize_segy_spec(segy_spec: SegySpec) -> SegySpec:
358-
assigned_bytes = []
359-
360-
ret = deepcopy(segy_spec)
361-
362-
for field in segy_spec.trace.header.fields:
363-
byte = field.byte - 1
364-
for i in range(byte, byte + _scalar_to_size(field.format)):
365-
assigned_bytes.append(i) # noqa: PERF402
366-
unassigned_bytes = [i for i in range(240) if i not in assigned_bytes]
367-
field_to_customize = [
368-
HeaderField(name=f"__MDIO_RAW_UNSPECIFIED_Field_{i}", format=ScalarType.UINT8, byte=i + 1)
369-
for i in unassigned_bytes
370-
]
371-
return ret.customize(trace_header_fields=field_to_customize)
372-
373-
374344
def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate:
375345
"""Add raw headers capability to the MDIO template by monkey-patching its _add_variables method.
376346
@@ -451,9 +421,6 @@ def segy_to_mdio( # noqa PLR0913
451421
input_path = _normalize_path(input_path)
452422
output_path = _normalize_path(output_path)
453423

454-
if os.getenv("MDIO__DO_RAW_HEADERS") == "1":
455-
segy_spec = _customize_segy_spec(segy_spec)
456-
457424
if not overwrite and output_path.exists():
458425
err = f"Output location '{output_path.as_posix()}' exists. Set `overwrite=True` if intended."
459426
raise FileExistsError(err)

src/mdio/segy/creation.py

Lines changed: 2 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,38 +28,6 @@
2828
logger = logging.getLogger(__name__)
2929

3030

31-
def _filter_raw_unspecified_fields(headers: NDArray) -> NDArray:
32-
"""Filter out __MDIO_RAW_UNSPECIFIED_Field_* fields from headers array.
33-
34-
These fields are added during SEGY import to preserve raw header bytes,
35-
but they cause dtype mismatches during export. This function removes them.
36-
37-
Args:
38-
headers: Header array that may contain raw unspecified fields.
39-
40-
Returns:
41-
Header array with raw unspecified fields removed.
42-
"""
43-
if headers.dtype.names is None:
44-
return headers
45-
46-
# Find field names that don't start with __MDIO_RAW_UNSPECIFIED_
47-
field_names = [name for name in headers.dtype.names if not name.startswith("__MDIO_RAW_UNSPECIFIED_")]
48-
49-
if len(field_names) == len(headers.dtype.names):
50-
# No raw unspecified fields found, return as-is
51-
return headers
52-
53-
# Create new structured array with only the non-raw fields
54-
new_dtype = [(name, headers.dtype.fields[name][0]) for name in field_names]
55-
filtered_headers = np.empty(headers.shape, dtype=new_dtype)
56-
57-
for name in field_names:
58-
filtered_headers[name] = headers[name]
59-
60-
return filtered_headers
61-
62-
6331
def make_segy_factory(spec: SegySpec, binary_header: dict[str, int]) -> SegyFactory:
6432
"""Generate SEG-Y factory from MDIO metadata."""
6533
sample_interval = binary_header["sample_interval"]
@@ -199,9 +167,7 @@ def serialize_to_segy_stack( # noqa: PLR0913
199167
samples = samples[live_mask]
200168
headers = headers[live_mask]
201169

202-
# Filter out raw unspecified fields that cause dtype mismatches
203-
filtered_headers = _filter_raw_unspecified_fields(headers)
204-
buffer = segy_factory.create_traces(filtered_headers, samples)
170+
buffer = segy_factory.create_traces(headers, samples)
205171

206172
global_index = block_start[0]
207173
record_id_str = str(global_index)
@@ -233,9 +199,7 @@ def serialize_to_segy_stack( # noqa: PLR0913
233199
rec_samples = samples[rec_index][rec_live_mask]
234200
rec_headers = headers[rec_index][rec_live_mask]
235201

236-
# Filter out raw unspecified fields that cause dtype mismatches
237-
filtered_headers = _filter_raw_unspecified_fields(rec_headers)
238-
buffer = segy_factory.create_traces(filtered_headers, rec_samples)
202+
buffer = segy_factory.create_traces(rec_headers, rec_samples)
239203

240204
global_index = tuple(block_start[i] + rec_index[i] for i in range(record_ndim))
241205
record_id_str = "/".join(map(str, global_index))

0 commit comments

Comments
 (0)