|
7 | 7 | from typing import TYPE_CHECKING |
8 | 8 |
|
9 | 9 | import numpy as np |
| 10 | +import zarr |
10 | 11 | from segy import SegyFile |
11 | 12 | from segy.config import SegySettings |
12 | 13 | from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem |
13 | 14 | from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0 |
14 | 15 |
|
15 | 16 | from mdio.api.io import _normalize_path |
16 | 17 | from mdio.api.io import to_mdio |
| 18 | +from mdio.builder.schemas.chunk_grid import RegularChunkGrid |
| 19 | +from mdio.builder.schemas.chunk_grid import RegularChunkShape |
| 20 | +from mdio.builder.schemas.compressors import Blosc |
| 21 | +from mdio.builder.schemas.compressors import BloscCname |
| 22 | +from mdio.builder.schemas.dtype import ScalarType |
17 | 23 | from mdio.builder.schemas.v1.units import LengthUnitEnum |
18 | 24 | from mdio.builder.schemas.v1.units import LengthUnitModel |
| 25 | +from mdio.builder.schemas.v1.variable import VariableMetadata |
19 | 26 | from mdio.builder.xarray_builder import to_xarray_dataset |
| 27 | +from mdio.constants import ZarrFormat |
20 | 28 | from mdio.converters.exceptions import EnvironmentFormatError |
21 | 29 | from mdio.converters.exceptions import GridTraceCountError |
22 | 30 | from mdio.converters.exceptions import GridTraceSparsityError |
@@ -333,6 +341,61 @@ def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: dict[str, A |
333 | 341 | dataset.metadata.attributes["gridOverrides"] = grid_overrides |
334 | 342 |
|
335 | 343 |
|
| 344 | +def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate: |
| 345 | + """Add raw headers capability to the MDIO template by monkey-patching its _add_variables method. |
| 346 | +
|
| 347 | + This function modifies the template's _add_variables method to also add a raw headers variable |
| 348 | + with the following characteristics: |
| 349 | + - Same rank as the Headers variable (all dimensions except vertical) |
| 350 | + - Name: "RawHeaders" |
| 351 | + - Type: ScalarType.HEADERS |
| 352 | + - No coordinates |
| 353 | + - zstd compressor |
| 354 | + - No additional metadata |
| 355 | + - Chunked the same as the Headers variable |
| 356 | +
|
| 357 | + Args: |
| 358 | + mdio_template: The MDIO template to mutate |
| 359 | + Returns: |
| 360 | + The mutated MDIO template |
| 361 | + """ |
| 362 | + # Check if raw headers enhancement has already been applied to avoid duplicate additions |
| 363 | + if hasattr(mdio_template, "_mdio_raw_headers_enhanced"): |
| 364 | + return mdio_template |
| 365 | + |
| 366 | + # Store the original _add_variables method |
| 367 | + original_add_variables = mdio_template._add_variables |
| 368 | + |
| 369 | + def enhanced_add_variables() -> None: |
| 370 | + # Call the original method first |
| 371 | + original_add_variables() |
| 372 | + |
| 373 | + # Now add the raw headers variable |
| 374 | + chunk_shape = mdio_template._var_chunk_shape[:-1] |
| 375 | + |
| 376 | + # Create chunk grid metadata |
| 377 | + chunk_metadata = RegularChunkGrid(configuration=RegularChunkShape(chunk_shape=chunk_shape)) |
| 378 | + |
| 379 | + # Add the raw headers variable using the builder's add_variable method |
| 380 | + mdio_template._builder.add_variable( |
| 381 | + name="raw_headers", |
| 382 | + long_name="Raw Headers", |
| 383 | + dimensions=mdio_template._dim_names[:-1], # All dimensions except vertical |
| 384 | + data_type=ScalarType.BYTES240, |
| 385 | + compressor=Blosc(cname=BloscCname.zstd), |
| 386 | + coordinates=None, # No coordinates as specified |
| 387 | + metadata=VariableMetadata(chunk_grid=chunk_metadata), |
| 388 | + ) |
| 389 | + |
| 390 | + # Replace the template's _add_variables method |
| 391 | + mdio_template._add_variables = enhanced_add_variables |
| 392 | + |
| 393 | + # Mark the template as enhanced to prevent duplicate monkey-patching |
| 394 | + mdio_template._mdio_raw_headers_enhanced = True |
| 395 | + |
| 396 | + return mdio_template |
| 397 | + |
| 398 | + |
336 | 399 | def segy_to_mdio( # noqa PLR0913 |
337 | 400 | segy_spec: SegySpec, |
338 | 401 | mdio_template: AbstractDatasetTemplate, |
@@ -372,6 +435,14 @@ def segy_to_mdio( # noqa PLR0913 |
372 | 435 |
|
373 | 436 | _, non_dim_coords = _get_coordinates(grid, segy_headers, mdio_template) |
374 | 437 | header_dtype = to_structured_type(segy_spec.trace.header.dtype) |
| 438 | + |
| 439 | + if os.getenv("MDIO__IMPORT__RAW_HEADERS") in ("1", "true", "yes", "on"): |
| 440 | + if zarr.config.get("default_zarr_format") == ZarrFormat.V2: |
| 441 | + logger.warning("Raw headers are only supported for Zarr v3. Skipping raw headers.") |
| 442 | + else: |
| 443 | + logger.warning("MDIO__IMPORT__RAW_HEADERS is experimental and expected to change or be removed.") |
| 444 | + mdio_template = _add_raw_headers_to_template(mdio_template) |
| 445 | + |
375 | 446 | horizontal_unit = _get_horizontal_coordinate_unit(segy_dimensions) |
376 | 447 | mdio_ds: Dataset = mdio_template.build_dataset( |
377 | 448 | name=mdio_template.name, |
|
0 commit comments