|
22 | 22 | from mdio.schemas.v1.units import AllUnits |
23 | 23 | from mdio.schemas.v1.units import LengthUnitEnum |
24 | 24 | from mdio.schemas.v1.units import LengthUnitModel |
| 25 | +from mdio.schemas.dtype import ScalarType |
| 26 | +from mdio.schemas.v1.variable import Variable |
| 27 | +from mdio.schemas.compressors import Blosc, BloscCname |
| 28 | +from mdio.schemas.metadata import ChunkGridMetadata |
| 29 | +from mdio.schemas.chunk_grid import RegularChunkGrid, RegularChunkShape |
25 | 30 | from mdio.segy import blocked_io |
26 | 31 | from mdio.segy.utilities import get_grid_plan |
27 | 32 |
|
@@ -312,6 +317,56 @@ def _add_text_binary_headers(dataset: Dataset, segy_file: SegyFile) -> None: |
312 | 317 | ) |
313 | 318 |
|
314 | 319 |
|
| 320 | +def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> None: |
| 321 | + """Add raw headers capability to the MDIO template by monkey-patching its _add_variables method. |
| 322 | + |
| 323 | + This function modifies the template's _add_variables method to also add a raw headers variable |
| 324 | + with the following characteristics: |
| 325 | + - Same rank as the Headers variable (all dimensions except vertical) |
| 326 | + - Name: "RawHeaders" |
| 327 | + - Type: ScalarType.HEADERS |
| 328 | + - No coordinates |
| 329 | + - zstd compressor |
| 330 | + - No additional metadata |
| 331 | + - Chunked the same as the Headers variable |
| 332 | + |
| 333 | + Args: |
| 334 | + mdio_template: The MDIO template to mutate |
| 335 | + """ |
| 336 | + # Store the original _add_variables method |
| 337 | + original_add_variables = mdio_template._add_variables |
| 338 | + |
| 339 | + def enhanced_add_variables(): |
| 340 | + # Call the original method first |
| 341 | + original_add_variables() |
| 342 | + |
| 343 | + # Now add the raw headers variable |
| 344 | + chunk_shape = mdio_template._var_chunk_shape[:-1] |
| 345 | + |
| 346 | + # Create chunk grid metadata |
| 347 | + chunk_metadata = ChunkGridMetadata( |
| 348 | + chunk_grid=RegularChunkGrid( |
| 349 | + configuration=RegularChunkShape(chunk_shape=chunk_shape) |
| 350 | + ) |
| 351 | + ) |
| 352 | + |
| 353 | + # Add the raw headers variable using the builder's add_variable method |
| 354 | + mdio_template._builder.add_variable( |
| 355 | + name="raw_headers", |
| 356 | + long_name="Raw Headers", |
| 357 | + dimensions=mdio_template._dim_names[:-1], # All dimensions except vertical |
| 358 | + data_type=ScalarType.HEADERS, |
| 359 | + compressor=Blosc(cname=BloscCname.zstd), |
| 360 | + coordinates=None, # No coordinates as specified |
| 361 | + metadata_info=[chunk_metadata], |
| 362 | + ) |
| 363 | + |
| 364 | + # Replace the template's _add_variables method |
| 365 | + mdio_template._add_variables = enhanced_add_variables |
| 366 | + |
| 367 | + return mdio_template |
| 368 | + |
| 369 | + |
315 | 370 | def segy_to_mdio( |
316 | 371 | segy_spec: SegySpec, |
317 | 372 | mdio_template: AbstractDatasetTemplate, |
@@ -350,6 +405,9 @@ def segy_to_mdio( |
350 | 405 | # https://github.com/TGSAI/mdio-python/issues/601 |
351 | 406 | headers = to_structured_type(segy_spec.trace.header.dtype) |
352 | 407 |
|
| 408 | + if os.getenv("MDIO__DO_RAW_HEADERS") == "1": |
| 409 | + mdio_template = _add_raw_headers_to_template(mdio_template) |
| 410 | + |
353 | 411 | horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions) |
354 | 412 | mdio_ds: Dataset = mdio_template.build_dataset( |
355 | 413 | name=mdio_template.name, |
|
0 commit comments