Skip to content

Commit 34a6046

Browse files
committed
refactor: update integration tests for v1 segy
1 parent bc07011 commit 34a6046

File tree

4 files changed

+15
-301
lines changed

4 files changed

+15
-301
lines changed

src/mdio/segy/creation.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,11 @@ def mdio_spec_to_segy(
119119
spec.endianness = Endianness(output_endian)
120120
factory = make_segy_factory(ds, spec=spec)
121121

122-
text_str = attributes["textHeader"]
123-
text_bytes = factory.create_textual_header(text_str)
122+
text_field = attributes["textHeader"]
123+
if isinstance(text_field, list):
124+
text_field = "".join(text_field)
125+
126+
text_bytes = factory.create_textual_header(text_field)
124127

125128
binary_header = revision_encode(attributes["binaryHeader"], mdio_file_version)
126129
bin_hdr_bytes = factory.create_binary_header(binary_header)

tests/integration/test_segy_import_export.py

Lines changed: 7 additions & 243 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from __future__ import annotations
44

55
import json
6-
import os
76
from typing import TYPE_CHECKING
87

98
import dask
@@ -21,220 +20,17 @@
2120
from tests.integration.testing_helpers import validate_variable
2221

2322
from mdio import mdio_to_segy
24-
from mdio.converters.exceptions import GridTraceSparsityError
2523
from mdio.converters.segy import segy_to_mdio
2624
from mdio.core.storage_location import StorageLocation
2725
from mdio.schemas.v1.templates.template_registry import TemplateRegistry
2826
from mdio.segy.compat import mdio_segy_spec
29-
from mdio.segy.geometry import StreamerShotGeometryType
3027

3128
if TYPE_CHECKING:
3229
from pathlib import Path
3330

3431
dask.config.set(scheduler="synchronous")
3532

3633

37-
@pytest.mark.parametrize("index_bytes", [(17, 137)])
38-
@pytest.mark.parametrize("index_names", [("shot_point", "cable")])
39-
@pytest.mark.parametrize("index_types", [("int32", "int16")])
40-
@pytest.mark.parametrize("grid_overrides", [{"NonBinned": True, "chunksize": 2}, {"HasDuplicates": True}])
41-
@pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.C])
42-
class TestImport4DNonReg:
43-
"""Test for 4D segy import with grid overrides."""
44-
45-
def test_import_4d_segy( # noqa: PLR0913
46-
self,
47-
segy_mock_4d_shots: dict[StreamerShotGeometryType, Path],
48-
zarr_tmp: Path,
49-
index_bytes: tuple[int, ...],
50-
index_names: tuple[str, ...],
51-
index_types: tuple[str, ...],
52-
grid_overrides: dict[str, bool | int],
53-
chan_header_type: StreamerShotGeometryType,
54-
) -> None:
55-
"""Test importing a SEG-Y file to MDIO."""
56-
segy_path = segy_mock_4d_shots[chan_header_type]
57-
58-
segy_to_mdio(
59-
segy_path=segy_path,
60-
mdio_path_or_buffer=zarr_tmp.__str__(),
61-
index_bytes=index_bytes,
62-
index_names=index_names,
63-
index_types=index_types,
64-
chunksize=(8, 2, 10),
65-
overwrite=True,
66-
grid_overrides=grid_overrides,
67-
)
68-
69-
# Expected values
70-
num_samples = 25
71-
shots = [2, 3, 5, 6, 7, 8, 9]
72-
cables = [0, 101, 201, 301]
73-
receivers_per_cable = [1, 5, 7, 5]
74-
75-
# QC mdio output
76-
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
77-
attrs = ds.attrs["attributes"]
78-
assert attrs["binaryHeader"]["samples_per_trace"] == num_samples
79-
80-
assert list(ds[index_names[0]].values) == shots
81-
assert list(ds[index_names[1]].values) == cables
82-
assert list(ds["trace"].values) == list(range(1, np.amax(receivers_per_cable) + 1))
83-
sample_dim = ds["amplitude"].dims[-1]
84-
assert list(ds[sample_dim].values) == list(range(0, num_samples, 1))
85-
86-
87-
@pytest.mark.parametrize("index_bytes", [(17, 137, 13)])
88-
@pytest.mark.parametrize("index_names", [("shot_point", "cable", "channel")])
89-
@pytest.mark.parametrize("index_types", [("int32", "int16", "int32")])
90-
@pytest.mark.parametrize("grid_overrides", [{"AutoChannelWrap": True}, None])
91-
@pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B])
92-
class TestImport4D:
93-
"""Test for 4D segy import with grid overrides."""
94-
95-
def test_import_4d_segy( # noqa: PLR0913
96-
self,
97-
segy_mock_4d_shots: dict[StreamerShotGeometryType, Path],
98-
zarr_tmp: Path,
99-
index_bytes: tuple[int, ...],
100-
index_names: tuple[str, ...],
101-
index_types: tuple[str, ...],
102-
grid_overrides: dict[str, bool | int],
103-
chan_header_type: StreamerShotGeometryType,
104-
) -> None:
105-
"""Test importing a SEG-Y file to MDIO."""
106-
segy_path = segy_mock_4d_shots[chan_header_type]
107-
108-
segy_to_mdio(
109-
segy_path=segy_path,
110-
mdio_path_or_buffer=zarr_tmp.__str__(),
111-
index_bytes=index_bytes,
112-
index_names=index_names,
113-
index_types=index_types,
114-
chunksize=(8, 2, 128, 1024),
115-
overwrite=True,
116-
grid_overrides=grid_overrides,
117-
)
118-
119-
# Expected values
120-
num_samples = 25
121-
shots = [2, 3, 5, 6, 7, 8, 9]
122-
cables = [0, 101, 201, 301]
123-
receivers_per_cable = [1, 5, 7, 5]
124-
125-
# QC mdio output
126-
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
127-
attrs = ds.attrs["attributes"]
128-
assert attrs["binaryHeader"]["samples_per_trace"] == num_samples
129-
130-
assert list(ds[index_names[0]].values) == shots
131-
assert list(ds[index_names[1]].values) == cables
132-
133-
if chan_header_type == StreamerShotGeometryType.B and grid_overrides is None:
134-
assert list(ds[index_names[2]].values) == list(range(1, np.sum(receivers_per_cable) + 1))
135-
else:
136-
assert list(ds[index_names[2]].values) == list(range(1, np.amax(receivers_per_cable) + 1))
137-
138-
sample_dim = ds["amplitude"].dims[-1]
139-
assert list(ds[sample_dim].values) == list(range(0, num_samples, 1))
140-
141-
142-
@pytest.mark.parametrize("index_bytes", [(17, 137, 13)])
143-
@pytest.mark.parametrize("index_names", [("shot_point", "cable", "channel")])
144-
@pytest.mark.parametrize("index_types", [("int32", "int16", "int32")])
145-
@pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A])
146-
class TestImport4DSparse:
147-
"""Test for 4D segy import with grid overrides."""
148-
149-
def test_import_4d_segy( # noqa: PLR0913
150-
self,
151-
segy_mock_4d_shots: dict[StreamerShotGeometryType, Path],
152-
zarr_tmp: Path,
153-
index_bytes: tuple[int, ...],
154-
index_names: tuple[str, ...],
155-
index_types: tuple[str, ...],
156-
chan_header_type: StreamerShotGeometryType,
157-
) -> None:
158-
"""Test importing a SEG-Y file to MDIO."""
159-
segy_path = segy_mock_4d_shots[chan_header_type]
160-
os.environ["MDIO__GRID__SPARSITY_RATIO_LIMIT"] = "1.1"
161-
162-
with pytest.raises(GridTraceSparsityError) as execinfo:
163-
segy_to_mdio(
164-
segy_path=segy_path,
165-
mdio_path_or_buffer=zarr_tmp.__str__(),
166-
index_bytes=index_bytes,
167-
index_names=index_names,
168-
index_types=index_types,
169-
chunksize=(8, 2, 128, 1024),
170-
overwrite=True,
171-
)
172-
173-
os.environ["MDIO__GRID__SPARSITY_RATIO_LIMIT"] = "10"
174-
assert "This grid is very sparse and most likely user error with indexing." in str(execinfo.value)
175-
176-
177-
@pytest.mark.parametrize("index_bytes", [(133, 171, 17, 137, 13)])
178-
@pytest.mark.parametrize("index_names", [("shot_line", "gun", "shot_point", "cable", "channel")])
179-
@pytest.mark.parametrize("index_types", [("int16", "int16", "int32", "int16", "int32")])
180-
@pytest.mark.parametrize("grid_overrides", [{"AutoChannelWrap": True, "AutoShotWrap": True}, None])
181-
@pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B])
182-
class TestImport6D:
183-
"""Test for 6D segy import with grid overrides."""
184-
185-
def test_import_6d_segy( # noqa: PLR0913
186-
self,
187-
segy_mock_4d_shots: dict[StreamerShotGeometryType, Path],
188-
zarr_tmp: Path,
189-
index_bytes: tuple[int, ...],
190-
index_names: tuple[str, ...],
191-
index_types: tuple[str, ...],
192-
grid_overrides: dict[str, bool] | None,
193-
chan_header_type: StreamerShotGeometryType,
194-
) -> None:
195-
"""Test importing a SEG-Y file to MDIO."""
196-
segy_path = segy_mock_4d_shots[chan_header_type]
197-
198-
segy_to_mdio(
199-
segy_path=segy_path,
200-
mdio_path_or_buffer=zarr_tmp.__str__(),
201-
index_bytes=index_bytes,
202-
index_names=index_names,
203-
index_types=index_types,
204-
chunksize=(1, 1, 8, 1, 12, 36),
205-
overwrite=True,
206-
grid_overrides=grid_overrides,
207-
)
208-
209-
# Expected values
210-
num_samples = 25
211-
shots = [2, 3, 5, 6, 7, 8, 9] # original shot list
212-
if grid_overrides is not None and "AutoShotWrap" in grid_overrides:
213-
shots_new = [int(shot / 2) for shot in shots] # Updated shot index when ingesting with 2 guns
214-
shots_set = set(shots_new) # remove duplicates
215-
shots = list(shots_set) # Unique shot points for 6D indexed with gun
216-
cables = [0, 101, 201, 301]
217-
guns = [1, 2]
218-
receivers_per_cable = [1, 5, 7, 5]
219-
220-
# QC mdio output
221-
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
222-
attrs = ds.attrs["attributes"]
223-
assert attrs["binaryHeader"]["samples_per_trace"] == num_samples
224-
225-
assert list(ds[index_names[1]].values) == guns
226-
assert list(ds[index_names[2]].values) == shots
227-
assert list(ds[index_names[3]].values) == cables
228-
229-
if chan_header_type == StreamerShotGeometryType.B and grid_overrides is None:
230-
assert list(ds[index_names[4]].values) == list(range(1, np.sum(receivers_per_cable) + 1))
231-
else:
232-
assert list(ds[index_names[4]].values) == list(range(1, np.amax(receivers_per_cable) + 1))
233-
234-
sample_dim = ds["amplitude"].dims[-1]
235-
assert list(ds[sample_dim].values) == list(range(0, num_samples, 1))
236-
237-
23834
@pytest.mark.dependency
23935
@pytest.mark.parametrize("index_bytes", [(17, 13, 81, 85)])
24036
@pytest.mark.parametrize("index_names", [("inline", "crossline", "cdp_x", "cdp_y")])
@@ -270,16 +66,13 @@ class TestReader:
27066

27167
def test_meta_dataset_read(self, zarr_tmp: Path) -> None:
27268
"""Metadata reading tests."""
273-
# NOTE: If mask_and_scale is not set,
274-
# Xarray will convert int to float and replace _FillValue with NaN
27569
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
27670
expected_attrs = {
27771
"apiVersion": "1.0.0a1",
27872
"createdOn": "2025-08-06 16:21:54.747880+00:00",
27973
"name": "PostStack3DTime",
28074
}
28175
actual_attrs_json = ds.attrs
282-
# compare one by one due to ever changing createdOn. For it, we only check existence
28376
for key, value in expected_attrs.items():
28477
assert key in actual_attrs_json
28578
if key == "createdOn":
@@ -290,21 +83,14 @@ def test_meta_dataset_read(self, zarr_tmp: Path) -> None:
29083
attributes = ds.attrs["attributes"]
29184
assert attributes is not None
29285

293-
# Validate attributes provided by the template
29486
assert attributes["surveyDimensionality"] == "3D"
29587
assert attributes["ensembleType"] == "line"
29688
assert attributes["processingStage"] == "post-stack"
297-
298-
# Validate text header
29989
assert attributes["textHeader"] == text_header_teapot_dome()
300-
301-
# Validate binary header
30290
assert attributes["binaryHeader"] == binary_header_teapot_dome()
30391

30492
def test_meta_variable_read(self, zarr_tmp: Path) -> None:
30593
"""Metadata reading tests."""
306-
# NOTE: If mask_and_scale is not set,
307-
# Xarray will convert int to float and replace _FillValue with NaN
30894
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
30995
expected_attrs = {
31096
"count": 97354860,
@@ -319,26 +105,15 @@ def test_meta_variable_read(self, zarr_tmp: Path) -> None:
319105

320106
def test_grid(self, zarr_tmp: Path) -> None:
321107
"""Test validating MDIO variables."""
322-
# Load Xarray dataset from the MDIO file
323-
# NOTE: If mask_and_scale is not set,
324-
# Xarray will convert int to float and replace _FillValue with NaN
325108
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
326109

327-
# Note: in order to create the dataset we used the Time template, so the
328-
# sample dimension is called "time"
329-
330-
# Validate the dimension coordinate variables
331110
validate_variable(ds, "inline", (345,), ["inline"], np.int32, range(1, 346), get_values)
332111
validate_variable(ds, "crossline", (188,), ["crossline"], np.int32, range(1, 189), get_values)
333112
validate_variable(ds, "time", (1501,), ["time"], np.int32, range(0, 3002, 2), get_values)
334113

335-
# Validate the non-dimensional coordinate variables
336114
validate_variable(ds, "cdp_x", (345, 188), ["inline", "crossline"], np.float64, None, None)
337115
validate_variable(ds, "cdp_y", (345, 188), ["inline", "crossline"], np.float64, None, None)
338116

339-
# Validate the headers
340-
# We have a subset of headers since we used customize_segy_specs() providing the values only
341-
# for "inline", "crossline", "cdp_x", "cdp_y"
342117
data_type = np.dtype([("inline", "<i4"), ("crossline", "<i4"), ("cdp_x", "<i4"), ("cdp_y", "<i4")])
343118
validate_variable(
344119
ds,
@@ -350,10 +125,7 @@ def test_grid(self, zarr_tmp: Path) -> None:
350125
get_inline_header_values,
351126
)
352127

353-
# Validate the trace mask
354-
validate_variable(ds, "trace_mask", (345, 188), ["inline", "crossline"], np.bool, None, None)
355-
356-
# validate the amplitude data
128+
validate_variable(ds, "trace_mask", (345, 188), ["inline", "crossline"], np.bool_, None, None)
357129
validate_variable(
358130
ds,
359131
"amplitude",
@@ -366,52 +138,44 @@ def test_grid(self, zarr_tmp: Path) -> None:
366138

367139
def test_inline(self, zarr_tmp: Path) -> None:
368140
"""Read and compare every 75 inlines' mean and std. dev."""
369-
# NOTE: If mask_and_scale is not set,
370-
# Xarray will convert int to float and replace _FillValue with NaN
371141
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
372142
inlines = ds["amplitude"][::75, :, :]
373143
mean, std = inlines.mean(), inlines.std()
374144
npt.assert_allclose([mean, std], [1.0555277e-04, 6.0027051e-01])
375145

376146
def test_crossline(self, zarr_tmp: Path) -> None:
377147
"""Read and compare every 75 crosslines' mean and std. dev."""
378-
# NOTE: If mask_and_scale is not set,
379-
# Xarray will convert int to float and replace _FillValue with NaN
380148
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
381-
xlines = ds["amplitude"][:, ::75, :]
149+
xlines = ds["amplitude"][::, ::75, :]
382150
mean, std = xlines.mean(), xlines.std()
383-
384151
npt.assert_allclose([mean, std], [-5.0329847e-05, 5.9406823e-01])
385152

386153
def test_zslice(self, zarr_tmp: Path) -> None:
387154
"""Read and compare every 225 z-slices' mean and std. dev."""
388-
# NOTE: If mask_and_scale is not set,
389-
# Xarray will convert int to float and replace _FillValue with NaN
390155
ds = xr.open_dataset(zarr_tmp, engine="zarr", mask_and_scale=False)
391-
slices = ds["amplitude"][:, :, ::225]
156+
slices = ds["amplitude"][::, ::, ::225]
392157
mean, std = slices.mean(), slices.std()
393158
npt.assert_allclose([mean, std], [0.005236923, 0.61279935])
394159

395160

396161
@pytest.mark.dependency("test_3d_import")
397162
class TestExport:
398-
"""Test SEG-Y exporting functionaliy."""
163+
"""Test SEG-Y exporting functionality."""
399164

400165
def test_3d_export(self, zarr_tmp: Path, segy_export_tmp: Path) -> None:
401-
"""Test 3D export to IBM and IEEE."""
166+
"""Export the ingested MDIO file back to SEG-Y."""
402167
mdio_to_segy(
403168
input_location=StorageLocation(zarr_tmp.__str__()),
404169
output_location=StorageLocation(segy_export_tmp.__str__()),
405170
)
406171

407172
def test_size_equal(self, segy_input: Path, segy_export_tmp: Path) -> None:
408-
"""Check if file sizes match on IBM file."""
173+
"""Confirm file sizes match after export."""
409174
assert segy_input.stat().st_size == segy_export_tmp.stat().st_size
410175

411176
def test_rand_equal(self, segy_input: Path, segy_export_tmp: Path) -> None:
412-
"""IBM. Is random original traces and headers match round-trip file?"""
177+
"""Verify trace data is preserved after round-trip export."""
413178
spec = mdio_segy_spec()
414-
415179
in_segy = SegyFile(segy_input, spec=spec)
416180
out_segy = SegyFile(segy_export_tmp, spec=spec)
417181

0 commit comments

Comments
 (0)