Skip to content

Commit 5e07a28

Browse files
tasansalAltay Sansal
andauthored
Add unit tests for exporting SEG-Y with nD grids and masked exports (TGSAI#512)
* Add integration tests for masked export of n-D SEG-Y to MDIO * Refactor variable names --------- Co-authored-by: Altay Sansal <[email protected]>
1 parent 3402224 commit 5e07a28

File tree

2 files changed

+301
-0
lines changed

2 files changed

+301
-0
lines changed

.flake8

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ rst-directives = deprecated
99

1010
per-file-ignores =
1111
tests/*:S101
12+
tests/integration/test_segy_import_export_masked.py:B950
1213
src/mdio/segy/_standards_rev0.py:B950
1314
src/mdio/segy/ebcdic.py:B950
1415
src/mdio/commands/segy.py:D301
Lines changed: 300 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,300 @@
1+
"""Test module for masked export of n-D SEG-Y files to MDIO.
2+
3+
We procedurally generate n-D SEG-Y files, import them and export both ways
4+
with and without selection masks. We then compare the resulting SEG-Y files
5+
to ensure they're identical to expected full or partial files.
6+
"""
7+
8+
from dataclasses import dataclass
9+
from pathlib import Path
10+
from typing import Iterable
11+
12+
import fsspec
13+
import numpy as np
14+
import pytest
15+
from numpy.testing import assert_array_equal
16+
from numpy.typing import NDArray
17+
from segy import SegyFile
18+
from segy.factory import SegyFactory
19+
from segy.schema import HeaderField
20+
from segy.standards import get_segy_standard
21+
22+
from mdio import MDIOReader
23+
from mdio import mdio_to_segy
24+
from mdio import segy_to_mdio
25+
from mdio.segy.utilities import segy_export_rechunker
26+
27+
28+
@dataclass
29+
class Dimension:
30+
"""Represents a single dimension for a multidimensional grid."""
31+
32+
name: str
33+
start: int
34+
size: int
35+
step: int
36+
37+
38+
@dataclass
39+
class GridConfig:
40+
"""Represents the configuration for a seismic grid."""
41+
42+
name: str
43+
dims: Iterable[Dimension]
44+
45+
46+
@dataclass
47+
class SegyFactoryConfig:
48+
"""Configuration class for SEG-Y creation with SegyFactory."""
49+
50+
revision: int | float
51+
header_byte_map: dict[str, int]
52+
num_samples: int
53+
54+
55+
@dataclass
56+
class SegyToMdioConfig:
57+
"""Configuration class for SEG-Y to MDIO conversion."""
58+
59+
chunks: Iterable[int]
60+
61+
62+
@dataclass
63+
class SelectionMaskConfig:
64+
"""Configuration class for masking out parts of the grid during export."""
65+
66+
mask_num_dims: int
67+
remove_frac: float | int
68+
69+
70+
@dataclass
71+
class MaskedExportConfig:
72+
"""Configuration class for a masked export test, combining above configs."""
73+
74+
grid_conf: GridConfig
75+
segy_factory_conf: SegyFactoryConfig
76+
segy_to_mdio_conf: SegyToMdioConfig
77+
selection_conf: SelectionMaskConfig
78+
79+
def __iter__(self):
80+
"""Allows for unpacking this dataclass in a test."""
81+
yield self.grid_conf
82+
yield self.segy_factory_conf
83+
yield self.segy_to_mdio_conf
84+
yield self.selection_conf
85+
86+
87+
# fmt: off
88+
STACK_2D_CONF = MaskedExportConfig(
89+
GridConfig(name="2d_stack", dims=[Dimension("cdp", 1, 1948, 1)]),
90+
SegyFactoryConfig(revision=1, header_byte_map={"cdp": 21}, num_samples=201),
91+
SegyToMdioConfig(chunks=[25, 128]),
92+
SelectionMaskConfig(mask_num_dims=1, remove_frac=0.998),
93+
)
94+
95+
STACK_3D_CONF = MaskedExportConfig(
96+
GridConfig(name="3d_stack", dims=[Dimension("inline", 10, 20, 1), Dimension("crossline", 100, 20, 2)]),
97+
SegyFactoryConfig(revision=1, header_byte_map={"inline": 189, "crossline": 193}, num_samples=201),
98+
SegyToMdioConfig(chunks=[6, 6, 6]),
99+
SelectionMaskConfig(mask_num_dims=2, remove_frac=0.98),
100+
)
101+
102+
GATHER_2D_CONF = MaskedExportConfig(
103+
GridConfig(name="2d_gather", dims=[Dimension("cdp", 1, 40, 1), Dimension("offset", 25, 20, 25)]),
104+
SegyFactoryConfig(revision=1, header_byte_map={"cdp": 21, "offset": 37}, num_samples=201),
105+
SegyToMdioConfig(chunks=[2, 12, 128]),
106+
SelectionMaskConfig(mask_num_dims=1, remove_frac=0.9),
107+
)
108+
109+
GATHER_3D_CONF = MaskedExportConfig(
110+
GridConfig(name="3d_gather", dims=[Dimension("inline", 10, 8, 1), Dimension("crossline", 100, 10, 2), Dimension("offset", 25, 10, 25)]),
111+
SegyFactoryConfig(revision=1, header_byte_map={"inline": 189, "crossline": 193, "offset": 37}, num_samples=201),
112+
SegyToMdioConfig(chunks=[4, 4, 2, 128]),
113+
SelectionMaskConfig(mask_num_dims=2, remove_frac=0.96),
114+
)
115+
116+
STREAMER_2D_CONF = MaskedExportConfig(
117+
GridConfig(name="2d_streamer", dims=[Dimension("shot", 10, 10, 1), Dimension("channel", 25, 60, 25)]),
118+
SegyFactoryConfig(revision=1, header_byte_map={"shot": 7, "channel": 131}, num_samples=201),
119+
SegyToMdioConfig(chunks=[2, 12, 128]),
120+
SelectionMaskConfig(mask_num_dims=1, remove_frac=0.7),
121+
)
122+
123+
STREAMER_3D_CONF = MaskedExportConfig(
124+
GridConfig(name="3d_streamer", dims=[Dimension("shot", 10, 5, 1), Dimension("cable", 1, 6, 1), Dimension("channel", 25, 60, 25)]),
125+
SegyFactoryConfig(revision=1, header_byte_map={"shot": 7, "cable": 193, "channel": 131}, num_samples=201),
126+
SegyToMdioConfig(chunks=[1, 2, 12, 128]),
127+
SelectionMaskConfig(mask_num_dims=1, remove_frac=0.5),
128+
)
129+
130+
COCA_3D_CONF = MaskedExportConfig(
131+
GridConfig(name="3d_coca", dims=[Dimension("inline", 10, 8, 1), Dimension("crossline", 100, 8, 2), Dimension("offset", 25, 15, 25), Dimension("azimuth", 0, 4, 30)]),
132+
SegyFactoryConfig(revision=1, header_byte_map={"inline": 189, "crossline": 193, "offset": 37, "azimuth": 181}, num_samples=201),
133+
SegyToMdioConfig(chunks=[4, 4, 4, 1, 128]),
134+
SelectionMaskConfig(mask_num_dims=2, remove_frac=0.9),
135+
)
136+
# fmt: on
137+
138+
139+
def mock_nd_segy(
140+
path: str,
141+
grid_conf: GridConfig,
142+
segy_factory_conf: SegyFactoryConfig,
143+
) -> None:
144+
"""Create a fake SEG-Y file with a multidimensional grid."""
145+
spec = get_segy_standard(segy_factory_conf.revision)
146+
147+
header_flds = []
148+
for dim in grid_conf.dims:
149+
byte_loc = segy_factory_conf.header_byte_map[dim.name]
150+
header_flds.append(HeaderField(name=dim.name, byte=byte_loc, format="int32"))
151+
152+
header_flds.append(HeaderField(name="samples_per_trace", byte=115, format="int16"))
153+
header_flds.append(HeaderField(name="sample_interval", byte=117, format="int16"))
154+
155+
spec = spec.customize(trace_header_fields=header_flds)
156+
spec.segy_standard = segy_factory_conf.revision
157+
factory = SegyFactory(spec=spec, samples_per_trace=segy_factory_conf.num_samples)
158+
159+
dim_coords = ()
160+
for dim in grid_conf.dims:
161+
start, size, step = dim.start, dim.size, dim.step
162+
stop = start + (size * step)
163+
dim_coords += (np.arange(start, stop, step),)
164+
165+
dim_grid = np.meshgrid(*dim_coords, indexing="ij")
166+
trace_numbers = np.arange(dim_grid[0].size) + 1
167+
168+
samples = factory.create_trace_sample_template(trace_numbers.size)
169+
headers = factory.create_trace_header_template(trace_numbers.size)
170+
171+
for dim_idx, dim in enumerate(grid_conf.dims):
172+
headers[dim.name] = dim_grid[dim_idx].ravel()
173+
174+
samples[:] = trace_numbers[..., None]
175+
176+
with fsspec.open(path, mode="wb") as fp:
177+
fp.write(factory.create_textual_header())
178+
fp.write(factory.create_binary_header())
179+
fp.write(factory.create_traces(headers, samples))
180+
181+
182+
def generate_selection_mask(
183+
selection_conf: SelectionMaskConfig,
184+
grid_conf: GridConfig,
185+
) -> NDArray:
186+
"""Generate a boolean selection mask for a masked export test."""
187+
spatial_shape = [dim.size for dim in grid_conf.dims]
188+
mask_dims = selection_conf.mask_num_dims
189+
mask_dim_shape = [dim.size for dim in grid_conf.dims[:mask_dims]]
190+
191+
selection_mask = np.zeros(shape=spatial_shape, dtype="bool")
192+
cut_axes = np.zeros(shape=mask_dim_shape, dtype="bool")
193+
194+
cut_size = int((1 - selection_conf.remove_frac) * cut_axes.size)
195+
rand_idx = np.random.choice(cut_axes.size, size=cut_size, replace=False)
196+
rand_idx = np.unravel_index(rand_idx, mask_dim_shape)
197+
selection_mask[rand_idx] = True
198+
199+
return selection_mask
200+
201+
202+
@pytest.fixture
203+
def export_masked_path(tmp_path_factory: pytest.TempPathFactory) -> Path:
204+
"""Fixture that generates temp directory for export tests."""
205+
return tmp_path_factory.getbasetemp() / "export_masked"
206+
207+
208+
# fmt: off
209+
@pytest.mark.parametrize(
210+
"test_conf",
211+
[STACK_2D_CONF, STACK_3D_CONF, GATHER_2D_CONF, GATHER_3D_CONF, STREAMER_2D_CONF, STREAMER_3D_CONF, COCA_3D_CONF],
212+
ids=["2d_stack", "3d_stack", "2d_gather", "3d_gather", "2d_streamer", "3d_streamer", "3d_coca"],
213+
)
214+
# fmt: on
215+
class TestNdImportExport:
216+
"""Test import/export of n-D SEG-Ys to MDIO, with and without selection mask."""
217+
218+
def test_import(
219+
self, test_conf: MaskedExportConfig, export_masked_path: Path
220+
) -> None:
221+
"""Test import of an n-D SEG-Y file to MDIO."""
222+
grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf
223+
224+
segy_path = export_masked_path / f"{grid_conf.name}.sgy"
225+
mdio_path = export_masked_path / f"{grid_conf.name}.mdio"
226+
print(mdio_path)
227+
228+
mock_nd_segy(segy_path, grid_conf, segy_factory_conf)
229+
230+
index_names = segy_factory_conf.header_byte_map.keys()
231+
index_bytes = segy_factory_conf.header_byte_map.values()
232+
chunksize = segy_to_mdio_conf.chunks
233+
234+
segy_to_mdio(
235+
segy_path.__str__(),
236+
mdio_path.__str__(),
237+
index_bytes,
238+
index_names,
239+
chunksize=chunksize,
240+
overwrite=True,
241+
)
242+
243+
def test_export(
244+
self, test_conf: MaskedExportConfig, export_masked_path: Path
245+
) -> None:
246+
"""Test export of an n-D MDIO file back to SEG-Y."""
247+
grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf
248+
249+
segy_path = export_masked_path / f"{grid_conf.name}.sgy"
250+
mdio_path = export_masked_path / f"{grid_conf.name}.mdio"
251+
segy_rt_path = export_masked_path / f"{grid_conf.name}_rt.sgy"
252+
253+
index_names = segy_factory_conf.header_byte_map.keys()
254+
access_pattern = "".join(map(str, range(len(index_names) + 1)))
255+
mdio = MDIOReader(mdio_path.__str__(), access_pattern=access_pattern)
256+
257+
chunks, shape = mdio.chunks, mdio.shape
258+
new_chunks = segy_export_rechunker(chunks, shape, dtype="float32", limit="0.3M")
259+
260+
mdio_to_segy(
261+
mdio_path.__str__(),
262+
segy_rt_path.__str__(),
263+
access_pattern=access_pattern,
264+
new_chunks=new_chunks,
265+
)
266+
267+
expected_sgy = SegyFile(segy_path)
268+
actual_sgy = SegyFile(segy_rt_path)
269+
assert_array_equal(actual_sgy.trace[:], expected_sgy.trace[:])
270+
271+
def test_export_masked(
272+
self, test_conf: MaskedExportConfig, export_masked_path: Path
273+
) -> None:
274+
"""Test export of an n-D MDIO file back to SEG-Y with masked export."""
275+
grid_conf, segy_factory_conf, segy_to_mdio_conf, selection_conf = test_conf
276+
277+
segy_path = export_masked_path / f"{grid_conf.name}.sgy"
278+
mdio_path = export_masked_path / f"{grid_conf.name}.mdio"
279+
segy_rt_path = export_masked_path / f"{grid_conf.name}_rt.sgy"
280+
281+
index_names = segy_factory_conf.header_byte_map.keys()
282+
access_pattern = "".join(map(str, range(len(index_names) + 1)))
283+
mdio = MDIOReader(mdio_path.__str__(), access_pattern=access_pattern)
284+
export_chunks = segy_export_rechunker(
285+
mdio.chunks, mdio.shape, dtype="float32", limit="0.3M"
286+
)
287+
selection_mask = generate_selection_mask(selection_conf, grid_conf)
288+
289+
mdio_to_segy(
290+
mdio_path.__str__(),
291+
segy_rt_path.__str__(),
292+
access_pattern=access_pattern,
293+
new_chunks=export_chunks,
294+
selection_mask=selection_mask,
295+
)
296+
297+
expected_trc_idx = selection_mask.ravel().nonzero()[0]
298+
expected_sgy = SegyFile(segy_path)
299+
actual_sgy = SegyFile(segy_rt_path)
300+
assert_array_equal(actual_sgy.trace[:], expected_sgy.trace[expected_trc_idx])

0 commit comments

Comments
 (0)