Skip to content

Commit 1e91fab

Browse files
committed
Reimplement disaster recovery logic
1 parent 954661c commit 1e91fab

File tree

5 files changed

+78
-1
lines changed

5 files changed

+78
-1
lines changed

src/mdio/builder/schemas/dtype.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class ScalarType(StrEnum):
3232
COMPLEX64 = "complex64"
3333
COMPLEX128 = "complex128"
3434
COMPLEX256 = "complex256"
35+
HEADERS_V3 = "r1920" # Raw number of BITS, must be a multiple of 8
3536

3637

3738
class StructuredField(CamelCaseStrictModel):

src/mdio/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,5 @@ class ZarrFormat(IntEnum):
6464
ScalarType.COMPLEX64: complex(np.nan, np.nan),
6565
ScalarType.COMPLEX128: complex(np.nan, np.nan),
6666
ScalarType.COMPLEX256: complex(np.nan, np.nan),
67+
ScalarType.HEADERS_V3: b"\x00" * 240,
6768
}

src/mdio/converters/segy.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@
2222
from mdio.converters.exceptions import GridTraceSparsityError
2323
from mdio.converters.type_converter import to_structured_type
2424
from mdio.core.grid import Grid
25+
from mdio.builder.schemas.chunk_grid import RegularChunkGrid
26+
from mdio.builder.schemas.chunk_grid import RegularChunkShape
27+
from mdio.builder.schemas.compressors import Blosc
28+
from mdio.builder.schemas.compressors import BloscCname
29+
from mdio.builder.schemas.dtype import ScalarType
2530
from mdio.segy import blocked_io
2631
from mdio.segy.utilities import get_grid_plan
2732

@@ -330,6 +335,58 @@ def _add_segy_ingest_attributes(dataset: Dataset, segy_file: SegyFile, grid_over
330335
dataset.metadata.attributes.update(segy_attributes)
331336

332337

338+
def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate:
339+
"""Add raw headers capability to the MDIO template by monkey-patching its _add_variables method.
340+
This function modifies the template's _add_variables method to also add a raw headers variable
341+
with the following characteristics:
342+
- Same rank as the Headers variable (all dimensions except vertical)
343+
- Name: "RawHeaders"
344+
- Type: ScalarType.HEADERS
345+
- No coordinates
346+
- zstd compressor
347+
- No additional metadata
348+
- Chunked the same as the Headers variable
349+
Args:
350+
mdio_template: The MDIO template to mutate
351+
"""
352+
# Check if raw headers enhancement has already been applied to avoid duplicate additions
353+
if hasattr(mdio_template, '_mdio_raw_headers_enhanced'):
354+
return mdio_template
355+
356+
# Store the original _add_variables method
357+
original_add_variables = mdio_template._add_variables
358+
359+
def enhanced_add_variables() -> None:
360+
# Call the original method first
361+
original_add_variables()
362+
363+
# Now add the raw headers variable
364+
chunk_shape = mdio_template._var_chunk_shape[:-1]
365+
366+
# Create chunk grid metadata
367+
chunk_metadata = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape))
368+
from mdio.builder.schemas.v1.variable import VariableMetadata
369+
370+
# Add the raw headers variable using the builder's add_variable method
371+
mdio_template._builder.add_variable(
372+
name="raw_headers",
373+
long_name="Raw Headers",
374+
dimensions=mdio_template._dim_names[:-1], # All dimensions except vertical
375+
data_type=ScalarType.HEADERS_V3,
376+
compressor=Blosc(cname=BloscCname.zstd),
377+
coordinates=None, # No coordinates as specified
378+
metadata=VariableMetadata(chunk_grid=chunk_metadata),
379+
)
380+
381+
# Replace the template's _add_variables method
382+
mdio_template._add_variables = enhanced_add_variables
383+
384+
# Mark the template as enhanced to prevent duplicate monkey-patching
385+
mdio_template._mdio_raw_headers_enhanced = True
386+
387+
return mdio_template
388+
389+
333390
def segy_to_mdio( # noqa PLR0913
334391
segy_spec: SegySpec,
335392
mdio_template: AbstractDatasetTemplate,
@@ -369,6 +426,11 @@ def segy_to_mdio( # noqa PLR0913
369426

370427
_, non_dim_coords = _get_coordinates(grid, segy_headers, mdio_template)
371428
header_dtype = to_structured_type(segy_spec.trace.header.dtype)
429+
430+
if os.getenv("MDIO__DO_RAW_HEADERS") == "1":
431+
logger.warning("MDIO__DO_RAW_HEADERS is experimental and expected to change or be removed.")
432+
mdio_template = _add_raw_headers_to_template(mdio_template)
433+
372434
horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions)
373435
mdio_ds: Dataset = mdio_template.build_dataset(
374436
name=mdio_template.name,

src/mdio/converters/type_converter.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ def to_structured_type(data_type: np_dtype) -> StructuredType:
7878
def to_numpy_dtype(data_type: ScalarType | StructuredType) -> np_dtype:
7979
"""Get the numpy dtype for a variable."""
8080
if isinstance(data_type, ScalarType):
81+
if data_type == ScalarType.HEADERS_V3:
82+
return np_dtype("|V240")
8183
return np_dtype(data_type.value)
8284
if isinstance(data_type, StructuredType):
8385
return np_dtype([(f.name, f.format.value) for f in data_type.fields])

src/mdio/segy/_workers.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -122,12 +122,15 @@ def trace_worker( # noqa: PLR0913
122122
traces = segy_file.trace[live_trace_indexes]
123123

124124
header_key = "headers"
125+
raw_header_key = "raw_headers"
125126

126127
# Get subset of the dataset that has not yet been saved
127128
# The headers might not be present in the dataset
128129
worker_variables = [data_variable_name]
129130
if header_key in dataset.data_vars: # Keeping the `if` here to allow for more worker configurations
130131
worker_variables.append(header_key)
132+
if raw_header_key in dataset.data_vars:
133+
worker_variables.append(raw_header_key)
131134

132135
ds_to_write = dataset[worker_variables]
133136

@@ -146,7 +149,15 @@ def trace_worker( # noqa: PLR0913
146149
attrs=ds_to_write[header_key].attrs,
147150
encoding=ds_to_write[header_key].encoding, # Not strictly necessary, but safer than not doing it.
148151
)
149-
152+
if raw_header_key in worker_variables:
153+
tmp_raw_headers = np.zeros_like(dataset[raw_header_key])
154+
tmp_raw_headers[not_null] = traces.header.view("|V240") # TODO: Ensure this is using the RAW view and not an interpreted view.
155+
ds_to_write[raw_header_key] = Variable(
156+
ds_to_write[raw_header_key].dims,
157+
tmp_raw_headers,
158+
attrs=ds_to_write[raw_header_key].attrs,
159+
encoding=ds_to_write[raw_header_key].encoding, # Not strictly necessary, but safer than not doing it.
160+
)
150161
data_variable = ds_to_write[data_variable_name]
151162
fill_value = _get_fill_value(ScalarType(data_variable.dtype.name))
152163
tmp_samples = np.full_like(data_variable, fill_value=fill_value)

0 commit comments

Comments
 (0)