Skip to content

Commit 5c6c5f8

Browse files
authored
Refactor revision handling to match new segy format (#494)
* Refactor SEG-Y compatibility with improved revision handling * Add InvalidMDIOError exception class * Rename function argument to match signature. * Add error handling to docstring. * Update pre-commit hook stages due to depracation * string compat for <py3.12 * Add unit tests for MDIO compatibility with older versions * Refactor MDIO version 0.7.4 metadata update logic.
1 parent 746f062 commit 5c6c5f8

File tree

5 files changed

+188
-10
lines changed

5 files changed

+188
-10
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ repos:
3333
entry: end-of-file-fixer
3434
language: system
3535
types: [text]
36-
stages: [commit, push, manual]
36+
stages: [pre-commit, pre-push, manual]
3737
- id: flake8
3838
name: flake8
3939
entry: flake8
@@ -60,7 +60,7 @@ repos:
6060
entry: trailing-whitespace-fixer
6161
language: system
6262
types: [text]
63-
stages: [commit, push, manual]
63+
stages: [pre-commit, pre-push, manual]
6464
args: [--markdown-linebreak-ext=md]
6565
- repo: https://github.com/pre-commit/mirrors-prettier
6666
rev: v2.6.0

src/mdio/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,7 @@ def __init__(self, message, name=None, expected=None):
5050
message = " - ".join([message, extras])
5151

5252
super().__init__(message)
53+
54+
55+
class InvalidMDIOError(MDIOError):
56+
"""Raised when an invalid MDIO file is encountered."""

src/mdio/segy/compat.py

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88

99
from __future__ import annotations
1010

11+
import logging
1112
import os
1213
from importlib import metadata
1314

15+
from packaging import version
1416
from segy.alias.segyio import SEGYIO_BIN_FIELD_MAP
1517
from segy.alias.segyio import SEGYIO_TRACE_FIELD_MAP
1618
from segy.schema import HeaderField
@@ -20,17 +22,33 @@
2022
from segy.schema import TextHeaderSpec
2123
from segy.schema import TraceDataSpec
2224
from segy.schema import TraceSpec
25+
from segy.standards.fields import binary
26+
27+
from mdio.exceptions import InvalidMDIOError
2328

2429

2530
MDIO_VERSION = metadata.version("multidimio")
2631

2732

33+
logger = logging.getLogger(__name__)
34+
35+
2836
def get_binary_fields() -> list[HeaderField]:
2937
"""Generate binary header fields from equinor/segyio fields."""
30-
return [field.model for field in SEGYIO_BIN_FIELD_MAP.values()]
38+
revision_field = binary.Rev1.SEGY_REVISION.model
39+
mdio_v0_bin_fields = []
40+
41+
# Replace min/max (rev2-ish) with rev1 like parsing.
42+
# Ignore minor one, and add the revision as 4-byte.
43+
for alias, field in SEGYIO_BIN_FIELD_MAP.items():
44+
if alias == "SEGYRevision":
45+
mdio_v0_bin_fields.append(revision_field)
46+
elif alias != "SEGYRevisionMinor":
47+
mdio_v0_bin_fields.append(field.model)
48+
return mdio_v0_bin_fields
3149

3250

33-
def get_trace_fields(version: str) -> list[HeaderField]:
51+
def get_trace_fields(version_str: str) -> list[HeaderField]:
3452
"""Generate trace header fields.
3553
3654
This part allows us to configure custom rules for different MDIO versions.
@@ -45,28 +63,29 @@ def get_trace_fields(version: str) -> list[HeaderField]:
4563
* mdio>=0.8.0 adds an extra field to the end to fill the last 8 bytes
4664
4765
Args:
48-
version: MDIO version to generate the trace fields for.
66+
version_str: MDIO version to generate the trace fields for.
4967
5068
Returns:
5169
List of header fields for specified MDIO version trace header encoding.
5270
"""
5371
trace_fields = [field.model for field in SEGYIO_TRACE_FIELD_MAP.values()]
54-
if version > "0.7.4":
72+
version_obj = version.parse(version_str)
73+
if version_obj > version.parse("0.7.4"):
5574
trace_fields.append(HeaderField(name="unassigned", byte=233, format="int64"))
5675
return trace_fields
5776

5877

59-
def mdio_segy_spec(version: str | None = None) -> SegySpec:
78+
def mdio_segy_spec(version_str: str | None = None) -> SegySpec:
6079
"""Get a SEG-Y encoding spec for MDIO based on version."""
6180
spec_override = os.getenv("MDIO__SEGY__SPEC")
6281

6382
if spec_override is not None:
6483
return SegySpec.model_validate_json(spec_override)
6584

66-
version = MDIO_VERSION if version is None else version
85+
version_str = MDIO_VERSION if version_str is None else version_str
6786

6887
binary_fields = get_binary_fields()
69-
trace_fields = get_trace_fields(version)
88+
trace_fields = get_trace_fields(version_str)
7089

7190
return SegySpec(
7291
segy_standard=None,
@@ -77,3 +96,43 @@ def mdio_segy_spec(version: str | None = None) -> SegySpec:
7796
data=TraceDataSpec(format=ScalarType.IBM32), # placeholder
7897
),
7998
)
99+
100+
101+
def revision_encode(binary_header: dict, version_str: str) -> dict:
102+
"""Encode revision code to binary header.
103+
104+
We have two cases where legacy MDIO uses keys "SEGYRevision" and
105+
"SEGYRevisionMinor" whereas the new one uses "segy_revision_major"
106+
and "segy_revision_minor". Given either case we return the correctly
107+
Rev1 like encoded revision code, ready to write to SEG-Y.
108+
109+
Args:
110+
binary_header: Dictionary representing the SEG-Y binary header.
111+
Contains keys for major and minor revision numbers.
112+
version_str: MDIO version string to determine the encoding format.
113+
114+
Returns:
115+
The updated binary header with the encoded revision.
116+
117+
Raises:
118+
InvalidMDIOError: Raised when binary header in MDIO is broken.
119+
"""
120+
version_obj = version.parse(version_str)
121+
if version_obj > version.parse("0.7.4"):
122+
major_key, minor_key = "segy_revision_major", "segy_revision_minor"
123+
else: # MDIO <0.8
124+
major_key, minor_key = "SEGYRevision", "SEGYRevisionMinor"
125+
126+
try:
127+
major = binary_header.pop(major_key)
128+
minor = binary_header.pop(minor_key)
129+
except KeyError:
130+
msg = "Missing revision keys from binary header."
131+
logger.error(msg)
132+
raise InvalidMDIOError(msg) from KeyError
133+
134+
code = (major << 8) | minor
135+
code_hex = f"0x{code:04x}"
136+
binary_header["segy_revision"] = code
137+
logger.info(f"Encoded revision {major}.{minor} to {code=} ~ {code_hex}")
138+
return binary_header

src/mdio/segy/creation.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import logging
56
import os
67
from os import path
78
from shutil import copyfileobj
@@ -16,12 +17,16 @@
1617

1718
from mdio.api.accessor import MDIOReader
1819
from mdio.segy.compat import mdio_segy_spec
20+
from mdio.segy.compat import revision_encode
1921

2022

2123
if TYPE_CHECKING:
2224
from numpy.typing import NDArray
2325

2426

27+
logger = logging.getLogger(__name__)
28+
29+
2530
def make_segy_factory(
2631
mdio: MDIOReader,
2732
spec: SegySpec,
@@ -91,7 +96,9 @@ def mdio_spec_to_segy(
9196

9297
text_str = "\n".join(mdio.text_header)
9398
text_bytes = factory.create_textual_header(text_str)
94-
bin_hdr_bytes = factory.create_binary_header(mdio.binary_header)
99+
100+
binary_header = revision_encode(mdio.binary_header, mdio_file_version)
101+
bin_hdr_bytes = factory.create_binary_header(binary_header)
95102

96103
with open(output_segy_path, mode="wb") as fp:
97104
fp.write(text_bytes)

tests/unit/test_compat.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""Test MDIO compatibility with older versions."""
2+
3+
from pathlib import Path
4+
5+
import numpy as np
6+
import pytest
7+
import zarr
8+
from segy import SegyFile
9+
from segy.factory import SegyFactory
10+
from segy.standards import get_segy_standard
11+
12+
from mdio import mdio_to_segy
13+
from mdio import segy_to_mdio
14+
15+
16+
# Constants
17+
MDIO_VERSIONS = ["0.7.4", "0.8.3"]
18+
SEGY_REVISIONS = [0.0, 0.1, 1.0, 1.1]
19+
INLINES = (10, 10, 11, 11)
20+
CROSSLINES = (100, 101, 100, 101)
21+
INDEX_BYTES = (189, 193)
22+
API_VERSION_KEY = "api_version"
23+
BINARY_HEADER_KEY = "binary_header"
24+
CHUNKED_TRACE_HEADERS_KEY = "chunked_012_trace_headers"
25+
26+
27+
def update_mdio_for_version_0_7_4(root):
28+
"""Update MDIO metadata to mimic version 0.7.4."""
29+
# Update binary header revision keys
30+
bin_hdr = root.metadata.attrs[BINARY_HEADER_KEY]
31+
bin_hdr["SEGYRevision"] = bin_hdr.pop("segy_revision_major")
32+
bin_hdr["SEGYRevisionMinor"] = bin_hdr.pop("segy_revision_minor")
33+
root.metadata.attrs[BINARY_HEADER_KEY] = bin_hdr
34+
35+
# Remove trace headers past field 232 (pre-0.8 schema)
36+
orig_hdr = root.metadata[CHUNKED_TRACE_HEADERS_KEY]
37+
new_dtype = np.dtype(orig_hdr.dtype.descr[:-1])
38+
new_hdr = zarr.zeros_like(orig_hdr, dtype=new_dtype)
39+
root.metadata.create_dataset(
40+
CHUNKED_TRACE_HEADERS_KEY,
41+
data=new_hdr,
42+
overwrite=True,
43+
)
44+
zarr.consolidate_metadata(root.store)
45+
46+
47+
@pytest.mark.parametrize("mdio_version", MDIO_VERSIONS)
48+
@pytest.mark.parametrize("segy_revision", SEGY_REVISIONS)
49+
def test_revision_encode_decode(
50+
mdio_version: str, segy_revision: float, tmp_path: Path
51+
) -> None:
52+
"""Test binary header major/minor revision roundtrip.
53+
54+
After introducting TGSAI/segy, we changed the header names. Now we use
55+
aliasing and MDIO has a dummy schema. The handling is slightly different
56+
for SEG-Y revision major/minor numbers. Testing to ensure they're
57+
(de)serialized correctly.
58+
"""
59+
rev1_spec = get_segy_standard(1.0)
60+
segy_filename = tmp_path / "segy_input.sgy"
61+
mdio_output_filename = tmp_path / "output.mdio"
62+
roundtrip_sgy_filename = tmp_path / "roundtrip_output.sgy"
63+
64+
# Make a rev1 segy
65+
factory = SegyFactory(rev1_spec, sample_interval=1000, samples_per_trace=5)
66+
67+
# We will replace the values in revision fields with these
68+
minor, major = np.modf(segy_revision)
69+
major, minor = int(major), int(minor * 10)
70+
revision_code = (major << 8) | minor
71+
72+
# Make fake tiny 3D dataset
73+
txt_buffer = factory.create_textual_header()
74+
75+
header = factory.create_trace_header_template(len(INLINES))
76+
data = factory.create_trace_sample_template(len(INLINES))
77+
header["inline"] = INLINES
78+
header["crossline"] = CROSSLINES
79+
data[:] = np.arange(len(INLINES))[:, None]
80+
trace_buffer = factory.create_traces(header, data)
81+
82+
# Update revision during bin hdr creation
83+
bin_hdr_buffer = factory.create_binary_header(
84+
update={"segy_revision": revision_code}
85+
)
86+
with open(segy_filename, mode="wb") as fp:
87+
fp.write(txt_buffer)
88+
fp.write(bin_hdr_buffer)
89+
fp.write(trace_buffer)
90+
91+
# Convert SEG-Y to MDIO
92+
segy_to_mdio(str(segy_filename), str(mdio_output_filename), index_bytes=INDEX_BYTES)
93+
94+
# Modify MDIO for specific versions
95+
root = zarr.open_group(mdio_output_filename, mode="r+")
96+
root.attrs[API_VERSION_KEY] = mdio_version
97+
if mdio_version == "0.7.4":
98+
update_mdio_for_version_0_7_4(root)
99+
100+
# Convert MDIO back to SEG-Y
101+
mdio_to_segy(str(mdio_output_filename), str(roundtrip_sgy_filename))
102+
103+
# Assert binary headers and revisions match
104+
orig = SegyFile(segy_filename, spec=rev1_spec)
105+
rt = SegyFile(roundtrip_sgy_filename, spec=rev1_spec)
106+
assert orig.binary_header["segy_revision_major"] == major
107+
assert orig.binary_header["segy_revision_minor"] == minor
108+
assert orig.binary_header == rt.binary_header

0 commit comments

Comments
 (0)