Skip to content

Commit b90f57e

Browse files
tasansalAltay Sansal
andauthored
Update SEGY library to >=0.5 (TGSAI#670)
* fix incorrect overlapping fields * update segy >= 0.5 * update tests with new customize * modify `segy_revision` handling when spec has None * update quickstart with new customize and fix documentation errors --------- Co-authored-by: Altay Sansal <[email protected]>
1 parent 517ccd0 commit b90f57e

File tree

9 files changed

+1642
-657
lines changed

9 files changed

+1642
-657
lines changed

docs/tutorials/quickstart.ipynb

Lines changed: 1344 additions & 314 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies = [
2525
"psutil>=7.0.0",
2626
"pydantic>=2.11.9",
2727
"rich>=14.1.0",
28-
"segy>=0.4.2",
28+
"segy>=0.5.0",
2929
"tqdm>=4.67.1",
3030
"universal-pathlib>=0.2.6",
3131
"xarray>=2025.9.0",

src/mdio/segy/creation.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import numpy as np
1212
from segy.factory import SegyFactory
13-
from segy.schema import SegyStandard
13+
from segy.standards.fields import binary
1414
from tqdm.auto import tqdm
1515

1616
from mdio.api.io import open_mdio
@@ -84,9 +84,12 @@ def mdio_spec_to_segy(
8484

8585
text_header_bytes = factory.create_textual_header(text_header)
8686

87-
# Remove segy_revision from binary header if target SEG-Y is Rev0
88-
if segy_spec.segy_standard is SegyStandard.REV0 and "segy_revision" in binary_header:
89-
binary_header.pop("segy_revision")
87+
# During MDIO SEGY import, TGSAI/segy always creates revision major/minor fields
88+
# We may not have it in the user desired spec. In that case we add it here
89+
if "segy_revision" not in segy_spec.binary_header.names:
90+
rev_field = binary.Rev1.SEGY_REVISION.model
91+
segy_spec.binary_header.customize(fields=rev_field)
92+
9093
binary_header_bytes = factory.create_binary_header(binary_header)
9194

9295
with output_path.open(mode="wb") as fp:

tests/integration/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ def get_segy_mock_4d_spec() -> SegySpec:
2626
HeaderField(name="channel", byte=13, format="int32"),
2727
HeaderField(name="shot_point", byte=17, format="int32"),
2828
HeaderField(name="offset", byte=37, format="int32"),
29-
HeaderField(name="samples_per_trace", byte=115, format="int32"),
30-
HeaderField(name="sample_interval", byte=117, format="int32"),
29+
HeaderField(name="samples_per_trace", byte=115, format="int16"),
30+
HeaderField(name="sample_interval", byte=117, format="int16"),
3131
HeaderField(name="shot_line", byte=133, format="int16"),
3232
HeaderField(name="cable", byte=137, format="int16"),
3333
HeaderField(name="gun", byte=171, format="int16"),

tests/integration/test_segy_import_export.py renamed to tests/integration/test_import_streamer_grid_overrides.py

Lines changed: 1 addition & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from __future__ import annotations
44

5-
import json
65
import os
76
from typing import TYPE_CHECKING
87

@@ -11,17 +10,8 @@
1110
import numpy.testing as npt
1211
import pytest
1312
import xarray.testing as xrt
14-
from segy import SegyFile
1513
from tests.integration.conftest import get_segy_mock_4d_spec
16-
from tests.integration.testing_data import binary_header_teapot_dome
17-
from tests.integration.testing_data import custom_teapot_dome_segy_spec
18-
from tests.integration.testing_data import text_header_teapot_dome
19-
from tests.integration.testing_helpers import get_inline_header_values
20-
from tests.integration.testing_helpers import get_values
21-
from tests.integration.testing_helpers import validate_variable
22-
23-
from mdio import __version__
24-
from mdio import mdio_to_segy
14+
2515
from mdio.api.io import open_mdio
2616
from mdio.builder.template_registry import TemplateRegistry
2717
from mdio.converters.exceptions import GridTraceSparsityError
@@ -34,7 +24,6 @@
3424

3525
from segy.schema import SegySpec
3626

37-
3827
dask.config.set(scheduler="synchronous")
3928
os.environ["MDIO__IMPORT__SAVE_SEGY_FILE_HEADER"] = "true"
4029

@@ -220,169 +209,3 @@ def test_import_6d_segy( # noqa: PLR0913
220209

221210
times_expected = list(range(0, num_samples, 1))
222211
xrt.assert_duckarray_equal(ds["time"], times_expected)
223-
224-
225-
@pytest.mark.dependency
226-
def test_3d_import(segy_input: Path, zarr_tmp: Path) -> None:
227-
"""Test importing a SEG-Y file to MDIO.
228-
229-
NOTE: This test must be executed before the 'TestReader' and 'TestExport' tests.
230-
"""
231-
segy_to_mdio(
232-
segy_spec=custom_teapot_dome_segy_spec(keep_unaltered=True),
233-
mdio_template=TemplateRegistry().get("PostStack3DTime"),
234-
input_path=segy_input,
235-
output_path=zarr_tmp,
236-
overwrite=True,
237-
)
238-
239-
240-
@pytest.mark.dependency("test_3d_import")
241-
class TestReader:
242-
"""Test reader functionality.
243-
244-
NOTE: These tests must be executed after the 'test_3d_import' and before running 'TestExport' tests.
245-
"""
246-
247-
def test_dataset_metadata(self, zarr_tmp: Path) -> None:
248-
"""Metadata reading tests."""
249-
ds = open_mdio(zarr_tmp)
250-
expected_attrs = {
251-
"apiVersion": __version__,
252-
"createdOn": "2025-08-06 16:21:54.747880+00:00",
253-
"name": "PostStack3DTime",
254-
}
255-
actual_attrs_json = ds.attrs
256-
# compare one by one due to ever changing createdOn. For it, we only check existence
257-
for key, value in expected_attrs.items():
258-
assert key in actual_attrs_json
259-
if key == "createdOn":
260-
assert actual_attrs_json[key] is not None
261-
else:
262-
assert actual_attrs_json[key] == value
263-
264-
attributes = ds.attrs["attributes"]
265-
assert attributes is not None
266-
assert len(attributes) == 3
267-
# Validate all attributes provided by the abstract template
268-
assert attributes["defaultVariableName"] == "amplitude"
269-
assert attributes["surveyType"] == "3D"
270-
assert attributes["gatherType"] == "stacked"
271-
272-
segy_file_header = ds["segy_file_header"]
273-
assert segy_file_header.attrs["textHeader"] == text_header_teapot_dome()
274-
assert segy_file_header.attrs["binaryHeader"] == binary_header_teapot_dome()
275-
276-
def test_variable_metadata(self, zarr_tmp: Path) -> None:
277-
"""Metadata reading tests."""
278-
ds = open_mdio(zarr_tmp)
279-
expected_attrs = {
280-
"count": 97354860,
281-
"sum": -8594.551666259766,
282-
"sumSquares": 40571291.6875,
283-
"min": -8.375323295593262,
284-
"max": 7.723702430725098,
285-
"histogram": {"counts": [], "binCenters": []},
286-
}
287-
actual_attrs_json = json.loads(ds["amplitude"].attrs["statsV1"])
288-
assert actual_attrs_json == expected_attrs
289-
290-
def test_grid(self, zarr_tmp: Path) -> None:
291-
"""Test validating MDIO variables."""
292-
ds = open_mdio(zarr_tmp)
293-
294-
# Validate the dimension coordinate variables
295-
validate_variable(ds, "inline", (345,), ("inline",), np.int32, range(1, 346), get_values)
296-
validate_variable(ds, "crossline", (188,), ("crossline",), np.int32, range(1, 189), get_values)
297-
validate_variable(ds, "time", (1501,), ("time",), np.int32, range(0, 3002, 2), get_values)
298-
299-
# Validate the non-dimensional coordinate variables
300-
validate_variable(ds, "cdp_x", (345, 188), ("inline", "crossline"), np.float64, None, None)
301-
validate_variable(ds, "cdp_y", (345, 188), ("inline", "crossline"), np.float64, None, None)
302-
303-
# Validate the headers
304-
# We have a custom set of headers since we used customize_segy_specs()
305-
segy_spec = custom_teapot_dome_segy_spec(keep_unaltered=True)
306-
data_type = segy_spec.trace.header.dtype
307-
308-
validate_variable(
309-
ds,
310-
"headers",
311-
(345, 188),
312-
("inline", "crossline"),
313-
data_type.newbyteorder("native"), # mdio saves with machine endian, spec could be different endian
314-
range(1, 346),
315-
get_inline_header_values,
316-
)
317-
318-
# Validate the trace mask
319-
validate_variable(ds, "trace_mask", (345, 188), ("inline", "crossline"), np.bool, None, None)
320-
321-
# validate the amplitude data
322-
validate_variable(
323-
ds,
324-
"amplitude",
325-
(345, 188, 1501),
326-
("inline", "crossline", "time"),
327-
np.float32,
328-
None,
329-
None,
330-
)
331-
332-
def test_inline_reads(self, zarr_tmp: Path) -> None:
333-
"""Read and compare every 75 inlines' mean and std. dev."""
334-
ds = open_mdio(zarr_tmp)
335-
inlines = ds["amplitude"][::75, :, :]
336-
mean, std = inlines.mean(), inlines.std()
337-
npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01])
338-
339-
def test_crossline_reads(self, zarr_tmp: Path) -> None:
340-
"""Read and compare every 75 crosslines' mean and std. dev."""
341-
ds = open_mdio(zarr_tmp)
342-
xlines = ds["amplitude"][:, ::75, :]
343-
mean, std = xlines.mean(), xlines.std()
344-
345-
npt.assert_allclose([mean, std], [-5.0329847e-05, 5.9406823e-01])
346-
347-
def test_zslice_reads(self, zarr_tmp: Path) -> None:
348-
"""Read and compare every 225 z-slices' mean and std. dev."""
349-
ds = open_mdio(zarr_tmp)
350-
slices = ds["amplitude"][:, :, ::225]
351-
mean, std = slices.mean(), slices.std()
352-
npt.assert_allclose([mean, std], [0.005236923, 0.61279935])
353-
354-
355-
@pytest.mark.dependency("test_3d_import")
356-
class TestExport:
357-
"""Test SEG-Y exporting functionality.
358-
359-
NOTE: This test(s) must be executed after the 'test_3d_import' and 'TestReader' tests successfully complete.
360-
"""
361-
362-
def test_3d_export(self, segy_input: Path, zarr_tmp: Path, segy_export_tmp: Path) -> None:
363-
"""Test 3D export to IBM and IEEE."""
364-
rng = np.random.default_rng(seed=1234)
365-
366-
spec = custom_teapot_dome_segy_spec(keep_unaltered=True)
367-
368-
mdio_to_segy(segy_spec=spec, input_path=zarr_tmp, output_path=segy_export_tmp)
369-
370-
# Check if file sizes match on IBM file.
371-
assert segy_input.stat().st_size == segy_export_tmp.stat().st_size
372-
373-
# IBM. Is random original traces and headers match round-trip file?
374-
in_segy = SegyFile(segy_input, spec=spec)
375-
out_segy = SegyFile(segy_export_tmp, spec=spec)
376-
377-
num_traces = in_segy.num_traces
378-
random_indices = rng.choice(num_traces, 100, replace=False)
379-
in_traces = in_segy.trace[random_indices]
380-
out_traces = out_segy.trace[random_indices]
381-
382-
assert in_segy.num_traces == out_segy.num_traces
383-
assert in_segy.text_header == out_segy.text_header
384-
assert in_segy.binary_header == out_segy.binary_header
385-
# TODO (Dmitriy Repin): Reconcile custom SegySpecs used in the roundtrip SEGY -> MDIO -> SEGY tests
386-
# https://github.com/TGSAI/mdio-python/issues/610
387-
npt.assert_array_equal(desired=in_traces.header, actual=out_traces.header)
388-
npt.assert_array_equal(desired=in_traces.sample, actual=out_traces.sample)

0 commit comments

Comments
 (0)