Skip to content

Commit 10d04ee

Browse files
committed
Begin implementation of header disaster recovery array
1 parent faeb616 commit 10d04ee

File tree

5 files changed

+74
-0
lines changed

5 files changed

+74
-0
lines changed

src/mdio/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,5 @@
5656
ScalarType.COMPLEX64: complex(np_nan, np_nan),
5757
ScalarType.COMPLEX128: complex(np_nan, np_nan),
5858
ScalarType.COMPLEX256: complex(np_nan, np_nan),
59+
ScalarType.HEADERS: b"\x00" * 240,
5960
}

src/mdio/converters/segy.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
from mdio.schemas.v1.units import AllUnits
2323
from mdio.schemas.v1.units import LengthUnitEnum
2424
from mdio.schemas.v1.units import LengthUnitModel
25+
from mdio.schemas.dtype import ScalarType
26+
from mdio.schemas.v1.variable import Variable
27+
from mdio.schemas.compressors import Blosc, BloscCname
28+
from mdio.schemas.metadata import ChunkGridMetadata
29+
from mdio.schemas.chunk_grid import RegularChunkGrid, RegularChunkShape
2530
from mdio.segy import blocked_io
2631
from mdio.segy.utilities import get_grid_plan
2732

@@ -312,6 +317,56 @@ def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None:
312317
)
313318

314319

320+
def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> None:
321+
"""Add raw headers capability to the MDIO template by monkey-patching its _add_variables method.
322+
323+
This function modifies the template's _add_variables method to also add a raw headers variable
324+
with the following characteristics:
325+
- Same rank as the Headers variable (all dimensions except vertical)
326+
- Name: "RawHeaders"
327+
- Type: ScalarType.HEADERS
328+
- No coordinates
329+
- zstd compressor
330+
- No additional metadata
331+
- Chunked the same as the Headers variable
332+
333+
Args:
334+
mdio_template: The MDIO template to mutate
335+
"""
336+
# Store the original _add_variables method
337+
original_add_variables = mdio_template._add_variables
338+
339+
def enhanced_add_variables():
340+
# Call the original method first
341+
original_add_variables()
342+
343+
# Now add the raw headers variable
344+
chunk_shape = mdio_template._var_chunk_shape[:-1]
345+
346+
# Create chunk grid metadata
347+
chunk_metadata = ChunkGridMetadata(
348+
chunk_grid=RegularChunkGrid(
349+
configuration=RegularChunkShape(chunk_shape=chunk_shape)
350+
)
351+
)
352+
353+
# Add the raw headers variable using the builder's add_variable method
354+
mdio_template._builder.add_variable(
355+
name="raw_headers",
356+
long_name="Raw Headers",
357+
dimensions=mdio_template._dim_names[:-1], # All dimensions except vertical
358+
data_type=ScalarType.HEADERS,
359+
compressor=Blosc(cname=BloscCname.zstd),
360+
coordinates=None, # No coordinates as specified
361+
metadata_info=[chunk_metadata],
362+
)
363+
364+
# Replace the template's _add_variables method
365+
mdio_template._add_variables = enhanced_add_variables
366+
367+
return mdio_template
368+
369+
315370
def segy_to_mdio(
316371
segy_spec: SegySpec,
317372
mdio_template: AbstractDatasetTemplate,
@@ -350,6 +405,9 @@ def segy_to_mdio(
350405
# https://github.com/TGSAI/mdio-python/issues/601
351406
headers = to_structured_type(segy_spec.trace.header.dtype)
352407

408+
if os.getenv("MDIO__DO_RAW_HEADERS") == "1":
409+
mdio_template = _add_raw_headers_to_template(mdio_template)
410+
353411
horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions)
354412
mdio_ds: Dataset = mdio_template.build_dataset(
355413
name=mdio_template.name,

src/mdio/converters/type_converter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ def to_structured_type(data_type: np_dtype) -> StructuredType:
7878
def to_numpy_dtype(data_type: ScalarType | StructuredType) -> np_dtype:
7979
"""Get the numpy dtype for a variable."""
8080
if isinstance(data_type, ScalarType):
81+
if data_type == ScalarType.HEADERS:
82+
return np_dtype("|V240")
8183
return np_dtype(data_type.value)
8284
if isinstance(data_type, StructuredType):
8385
return np_dtype([(f.name, f.format.value) for f in data_type.fields])

src/mdio/schemas/dtype.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class ScalarType(StrEnum):
3232
COMPLEX64 = "complex64"
3333
COMPLEX128 = "complex128"
3434
COMPLEX256 = "complex256"
35+
HEADERS = "r1920" # Raw number of BITS, must be a multiple of 8
3536

3637

3738
class StructuredField(CamelCaseStrictModel):

src/mdio/segy/_workers.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,15 @@ def trace_worker( # noqa: PLR0913
122122
traces = segy_file.trace[live_trace_indexes]
123123

124124
header_key = "headers"
125+
raw_header_key = "raw_headers"
125126

126127
# Get subset of the dataset that has not yet been saved
127128
# The headers might not be present in the dataset
128129
worker_variables = [data_variable_name]
129130
if header_key in dataset.data_vars: # Keeping the `if` here to allow for more worker configurations
130131
worker_variables.append(header_key)
132+
if raw_header_key in dataset.data_vars:
133+
worker_variables.append(raw_header_key)
131134

132135
ds_to_write = dataset[worker_variables]
133136

@@ -146,6 +149,15 @@ def trace_worker( # noqa: PLR0913
146149
attrs=ds_to_write[header_key].attrs,
147150
encoding=ds_to_write[header_key].encoding, # Not strictly necessary, but safer than not doing it.
148151
)
152+
if raw_header_key in worker_variables:
153+
tmp_raw_headers = np.zeros_like(dataset[raw_header_key])
154+
tmp_raw_headers[not_null] = traces.header.view("|V240")
155+
ds_to_write[raw_header_key] = Variable(
156+
ds_to_write[raw_header_key].dims,
157+
tmp_raw_headers,
158+
attrs=ds_to_write[raw_header_key].attrs,
159+
encoding=ds_to_write[raw_header_key].encoding, # Not strictly necessary, but safer than not doing it.
160+
)
149161

150162
data_variable = ds_to_write[data_variable_name]
151163
fill_value = _get_fill_value(ScalarType(data_variable.dtype.name))

0 commit comments

Comments
 (0)