|
22 | 22 | from mdio.converters.exceptions import GridTraceSparsityError |
23 | 23 | from mdio.converters.type_converter import to_structured_type |
24 | 24 | from mdio.core.grid import Grid |
| 25 | +from mdio.builder.schemas.chunk_grid import RegularChunkGrid |
| 26 | +from mdio.builder.schemas.chunk_grid import RegularChunkShape |
| 27 | +from mdio.builder.schemas.compressors import Blosc |
| 28 | +from mdio.builder.schemas.compressors import BloscCname |
| 29 | +from mdio.builder.schemas.dtype import ScalarType |
25 | 30 | from mdio.segy import blocked_io |
26 | 31 | from mdio.segy.utilities import get_grid_plan |
27 | 32 |
|
@@ -330,6 +335,58 @@ def _add_segy_ingest_attributes(dataset: Dataset, segy_file: SegyFile, grid_over |
330 | 335 | dataset.metadata.attributes.update(segy_attributes) |
331 | 336 |
|
332 | 337 |
|
| 338 | +def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate: |
| 339 | + """Add raw headers capability to the MDIO template by monkey-patching its _add_variables method. |
| 340 | + This function modifies the template's _add_variables method to also add a raw headers variable |
| 341 | + with the following characteristics: |
| 342 | + - Same rank as the Headers variable (all dimensions except vertical) |
| 343 | + - Name: "RawHeaders" |
| 344 | + - Type: ScalarType.HEADERS |
| 345 | + - No coordinates |
| 346 | + - zstd compressor |
| 347 | + - No additional metadata |
| 348 | + - Chunked the same as the Headers variable |
| 349 | + Args: |
| 350 | + mdio_template: The MDIO template to mutate |
| 351 | + """ |
| 352 | + # Check if raw headers enhancement has already been applied to avoid duplicate additions |
| 353 | + if hasattr(mdio_template, '_mdio_raw_headers_enhanced'): |
| 354 | + return mdio_template |
| 355 | + |
| 356 | + # Store the original _add_variables method |
| 357 | + original_add_variables = mdio_template._add_variables |
| 358 | + |
| 359 | + def enhanced_add_variables() -> None: |
| 360 | + # Call the original method first |
| 361 | + original_add_variables() |
| 362 | + |
| 363 | + # Now add the raw headers variable |
| 364 | + chunk_shape = mdio_template._var_chunk_shape[:-1] |
| 365 | + |
| 366 | + # Create chunk grid metadata |
| 367 | + chunk_metadata = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape)) |
| 368 | + from mdio.builder.schemas.v1.variable import VariableMetadata |
| 369 | + |
| 370 | + # Add the raw headers variable using the builder's add_variable method |
| 371 | + mdio_template._builder.add_variable( |
| 372 | + name="raw_headers", |
| 373 | + long_name="Raw Headers", |
| 374 | + dimensions=mdio_template._dim_names[:-1], # All dimensions except vertical |
| 375 | + data_type=ScalarType.HEADERS_V3, |
| 376 | + compressor=Blosc(cname=BloscCname.zstd), |
| 377 | + coordinates=None, # No coordinates as specified |
| 378 | + metadata=VariableMetadata(chunk_grid=chunk_metadata), |
| 379 | + ) |
| 380 | + |
| 381 | + # Replace the template's _add_variables method |
| 382 | + mdio_template._add_variables = enhanced_add_variables |
| 383 | + |
| 384 | + # Mark the template as enhanced to prevent duplicate monkey-patching |
| 385 | + mdio_template._mdio_raw_headers_enhanced = True |
| 386 | + |
| 387 | + return mdio_template |
| 388 | + |
| 389 | + |
333 | 390 | def segy_to_mdio( # noqa PLR0913 |
334 | 391 | segy_spec: SegySpec, |
335 | 392 | mdio_template: AbstractDatasetTemplate, |
@@ -369,6 +426,11 @@ def segy_to_mdio( # noqa PLR0913 |
369 | 426 |
|
370 | 427 | _, non_dim_coords = _get_coordinates(grid, segy_headers, mdio_template) |
371 | 428 | header_dtype = to_structured_type(segy_spec.trace.header.dtype) |
| 429 | + |
| 430 | + if os.getenv("MDIO__DO_RAW_HEADERS") == "1": |
| 431 | + logger.warning("MDIO__DO_RAW_HEADERS is experimental and expected to change or be removed.") |
| 432 | + mdio_template = _add_raw_headers_to_template(mdio_template) |
| 433 | + |
372 | 434 | horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions) |
373 | 435 | mdio_ds: Dataset = mdio_template.build_dataset( |
374 | 436 | name=mdio_template.name, |
|
0 commit comments