|
| 1 | +"""Test module for masked export of n-D SEG-Y files to MDIO. |
| 2 | +
|
| 3 | +We procedurally generate n-D SEG-Y files, import them and export both ways |
| 4 | +with and without selection masks. We then compare the resulting SEG-Y files |
| 5 | +to ensure they're identical to expected full or partial files. |
| 6 | +""" |
| 7 | + |
| 8 | +from dataclasses import dataclass |
| 9 | +from pathlib import Path |
| 10 | +from typing import Iterable |
| 11 | + |
| 12 | +import fsspec |
| 13 | +import numpy as np |
| 14 | +import pytest |
| 15 | +from numpy.testing import assert_array_equal |
| 16 | +from numpy.typing import NDArray |
| 17 | +from segy import SegyFile |
| 18 | +from segy.factory import SegyFactory |
| 19 | +from segy.schema import HeaderField |
| 20 | +from segy.standards import get_segy_standard |
| 21 | + |
| 22 | +from mdio import MDIOReader |
| 23 | +from mdio import mdio_to_segy |
| 24 | +from mdio import segy_to_mdio |
| 25 | +from mdio.segy.utilities import segy_export_rechunker |
| 26 | + |
| 27 | + |
| 28 | +@dataclass |
| 29 | +class Dimension: |
| 30 | + """Represents a single dimension for a multidimensional grid.""" |
| 31 | + |
| 32 | + name: str |
| 33 | + start: int |
| 34 | + size: int |
| 35 | + step: int |
| 36 | + |
| 37 | + |
| 38 | +@dataclass |
| 39 | +class GridConfig: |
| 40 | + """Represents the configuration for a seismic grid.""" |
| 41 | + |
| 42 | + name: str |
| 43 | + dims: Iterable[Dimension] |
| 44 | + |
| 45 | + |
| 46 | +@dataclass |
| 47 | +class SegyFactoryConfig: |
| 48 | + """Configuration class for SEG-Y creation with SegyFactory.""" |
| 49 | + |
| 50 | + revision: int | float |
| 51 | + header_byte_map: dict[str, int] |
| 52 | + num_samples: int |
| 53 | + |
| 54 | + |
| 55 | +@dataclass |
| 56 | +class SegyToMdioConfig: |
| 57 | + """Configuration class for SEG-Y to MDIO conversion.""" |
| 58 | + |
| 59 | + chunks: Iterable[int] |
| 60 | + |
| 61 | + |
| 62 | +@dataclass |
| 63 | +class SelectionMaskConfig: |
| 64 | + """Configuration class for masking out parts of the grid during export.""" |
| 65 | + |
| 66 | + mask_num_dims: int |
| 67 | + remove_frac: float | int |
| 68 | + |
| 69 | + |
| 70 | +@dataclass |
| 71 | +class MaskedExportConfig: |
| 72 | + """Configuration class for a masked export test, combining above configs.""" |
| 73 | + |
| 74 | + grid_conf: GridConfig |
| 75 | + segy_factory_conf: SegyFactoryConfig |
| 76 | + segy_to_mdio_conf: SegyToMdioConfig |
| 77 | + selection_conf: SelectionMaskConfig |
| 78 | + |
| 79 | + def __iter__(self): |
| 80 | + """Allows for unpacking this dataclass in a test.""" |
| 81 | + yield self.grid_conf |
| 82 | + yield self.segy_factory_conf |
| 83 | + yield self.segy_to_mdio_conf |
| 84 | + yield self.selection_conf |
| 85 | + |
| 86 | + |
| 87 | +# fmt: off |
| 88 | +STACK_2D_CONF = MaskedExportConfig( |
| 89 | + GridConfig(name="2d_stack", dims=[Dimension("cdp", 1, 1948, 1)]), |
| 90 | + SegyFactoryConfig(revision=1, header_byte_map={"cdp": 21}, num_samples=201), |
| 91 | + SegyToMdioConfig(chunks=[25, 128]), |
| 92 | + SelectionMaskConfig(mask_num_dims=1, remove_frac=0.998), |
| 93 | +) |
| 94 | + |
| 95 | +STACK_3D_CONF = MaskedExportConfig( |
| 96 | + GridConfig(name="3d_stack", dims=[Dimension("inline", 10, 20, 1), Dimension("crossline", 100, 20, 2)]), |
| 97 | + SegyFactoryConfig(revision=1, header_byte_map={"inline": 189, "crossline": 193}, num_samples=201), |
| 98 | + SegyToMdioConfig(chunks=[6, 6, 6]), |
| 99 | + SelectionMaskConfig(mask_num_dims=2, remove_frac=0.98), |
| 100 | +) |
| 101 | + |
| 102 | +GATHER_2D_CONF = MaskedExportConfig( |
| 103 | + GridConfig(name="2d_gather", dims=[Dimension("cdp", 1, 40, 1), Dimension("offset", 25, 20, 25)]), |
| 104 | + SegyFactoryConfig(revision=1, header_byte_map={"cdp": 21, "offset": 37}, num_samples=201), |
| 105 | + SegyToMdioConfig(chunks=[2, 12, 128]), |
| 106 | + SelectionMaskConfig(mask_num_dims=1, remove_frac=0.9), |
| 107 | +) |
| 108 | + |
| 109 | +GATHER_3D_CONF = MaskedExportConfig( |
| 110 | + GridConfig(name="3d_gather", dims=[Dimension("inline", 10, 8, 1), Dimension("crossline", 100, 10, 2), Dimension("offset", 25, 10, 25)]), |
| 111 | + SegyFactoryConfig(revision=1, header_byte_map={"inline": 189, "crossline": 193, "offset": 37}, num_samples=201), |
| 112 | + SegyToMdioConfig(chunks=[4, 4, 2, 128]), |
| 113 | + SelectionMaskConfig(mask_num_dims=2, remove_frac=0.96), |
| 114 | +) |
| 115 | + |
| 116 | +STREAMER_2D_CONF = MaskedExportConfig( |
| 117 | + GridConfig(name="2d_streamer", dims=[Dimension("shot", 10, 10, 1), Dimension("channel", 25, 60, 25)]), |
| 118 | + SegyFactoryConfig(revision=1, header_byte_map={"shot": 7, "channel": 131}, num_samples=201), |
| 119 | + SegyToMdioConfig(chunks=[2, 12, 128]), |
| 120 | + SelectionMaskConfig(mask_num_dims=1, remove_frac=0.7), |
| 121 | +) |
| 122 | + |
| 123 | +STREAMER_3D_CONF = MaskedExportConfig( |
| 124 | + GridConfig(name="3d_streamer", dims=[Dimension("shot", 10, 5, 1), Dimension("cable", 1, 6, 1), Dimension("channel", 25, 60, 25)]), |
| 125 | + SegyFactoryConfig(revision=1, header_byte_map={"shot": 7, "cable": 193, "channel": 131}, num_samples=201), |
| 126 | + SegyToMdioConfig(chunks=[1, 2, 12, 128]), |
| 127 | + SelectionMaskConfig(mask_num_dims=1, remove_frac=0.5), |
| 128 | +) |
| 129 | + |
| 130 | +COCA_3D_CONF = MaskedExportConfig( |
| 131 | + GridConfig(name="3d_coca", dims=[Dimension("inline", 10, 8, 1), Dimension("crossline", 100, 8, 2), Dimension("offset", 25, 15, 25), Dimension("azimuth", 0, 4, 30)]), |
| 132 | + SegyFactoryConfig(revision=1, header_byte_map={"inline": 189, "crossline": 193, "offset": 37, "azimuth": 181}, num_samples=201), |
| 133 | + SegyToMdioConfig(chunks=[4, 4, 4, 1, 128]), |
| 134 | + SelectionMaskConfig(mask_num_dims=2, remove_frac=0.9), |
| 135 | +) |
| 136 | +# fmt: on |
| 137 | + |
| 138 | + |
| 139 | +def mock_nd_segy( |
| 140 | + path: str, |
| 141 | + grid_conf: GridConfig, |
| 142 | + segy_factory_conf: SegyFactoryConfig, |
| 143 | +) -> None: |
| 144 | + """Create a fake SEG-Y file with a multidimensional grid.""" |
| 145 | + spec = get_segy_standard(segy_factory_conf.revision) |
| 146 | + |
| 147 | + header_flds = [] |
| 148 | + for dim in grid_conf.dims: |
| 149 | + byte_loc = segy_factory_conf.header_byte_map[dim.name] |
| 150 | + header_flds.append(HeaderField(name=dim.name, byte=byte_loc, format="int32")) |
| 151 | + |
| 152 | + header_flds.append(HeaderField(name="samples_per_trace", byte=115, format="int16")) |
| 153 | + header_flds.append(HeaderField(name="sample_interval", byte=117, format="int16")) |
| 154 | + |
| 155 | + spec = spec.customize(trace_header_fields=header_flds) |
| 156 | + spec.segy_standard = segy_factory_conf.revision |
| 157 | + factory = SegyFactory(spec=spec, samples_per_trace=segy_factory_conf.num_samples) |
| 158 | + |
| 159 | + dim_coords = () |
| 160 | + for dim in grid_conf.dims: |
| 161 | + start, size, step = dim.start, dim.size, dim.step |
| 162 | + stop = start + (size * step) |
| 163 | + dim_coords += (np.arange(start, stop, step),) |
| 164 | + |
| 165 | + dim_grid = np.meshgrid(*dim_coords, indexing="ij") |
| 166 | + trace_numbers = np.arange(dim_grid[0].size) + 1 |
| 167 | + |
| 168 | + samples = factory.create_trace_sample_template(trace_numbers.size) |
| 169 | + headers = factory.create_trace_header_template(trace_numbers.size) |
| 170 | + |
| 171 | + for dim_idx, dim in enumerate(grid_conf.dims): |
| 172 | + headers[dim.name] = dim_grid[dim_idx].ravel() |
| 173 | + |
| 174 | + samples[:] = trace_numbers[..., None] |
| 175 | + |
| 176 | + with fsspec.open(path, mode="wb") as fp: |
| 177 | + fp.write(factory.create_textual_header()) |
| 178 | + fp.write(factory.create_binary_header()) |
| 179 | + fp.write(factory.create_traces(headers, samples)) |
| 180 | + |
| 181 | + |
| 182 | +def generate_selection_mask( |
| 183 | + selection_conf: SelectionMaskConfig, |
| 184 | + grid_conf: GridConfig, |
| 185 | +) -> NDArray: |
| 186 | + """Generate a boolean selection mask for a masked export test.""" |
| 187 | + spatial_shape = [dim.size for dim in grid_conf.dims] |
| 188 | + mask_dims = selection_conf.mask_num_dims |
| 189 | + mask_dim_shape = [dim.size for dim in grid_conf.dims[:mask_dims]] |
| 190 | + |
| 191 | + selection_mask = np.zeros(shape=spatial_shape, dtype="bool") |
| 192 | + cut_axes = np.zeros(shape=mask_dim_shape, dtype="bool") |
| 193 | + |
| 194 | + cut_size = int((1 - selection_conf.remove_frac) * cut_axes.size) |
| 195 | + rand_idx = np.random.choice(cut_axes.size, size=cut_size, replace=False) |
| 196 | + rand_idx = np.unravel_index(rand_idx, mask_dim_shape) |
| 197 | + selection_mask[rand_idx] = True |
| 198 | + |
| 199 | + return selection_mask |
| 200 | + |
| 201 | + |
| 202 | +@pytest.fixture |
| 203 | +def export_masked_path(tmp_path_factory: pytest.TempPathFactory) -> Path: |
| 204 | + """Fixture that generates temp directory for export tests.""" |
| 205 | + return tmp_path_factory.getbasetemp() / "export_masked" |
| 206 | + |
| 207 | + |
| 208 | +# fmt: off |
| 209 | +@pytest.mark.parametrize( |
| 210 | + "test_conf", |
| 211 | + [STACK_2D_CONF, STACK_3D_CONF, GATHER_2D_CONF, GATHER_3D_CONF, STREAMER_2D_CONF, STREAMER_3D_CONF, COCA_3D_CONF], |
| 212 | + ids=["2d_stack", "3d_stack", "2d_gather", "3d_gather", "2d_streamer", "3d_streamer", "3d_coca"], |
| 213 | +) |
| 214 | +# fmt: on |
| 215 | +class TestNdImportExport: |
| 216 | + """Test import/export of n-D SEG-Ys to MDIO, with and without selection mask.""" |
| 217 | + |
| 218 | + def test_import( |
| 219 | + self, test_conf: MaskedExportConfig, export_masked_path: Path |
| 220 | + ) -> None: |
| 221 | + """Test import of an n-D SEG-Y file to MDIO.""" |
| 222 | + grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf |
| 223 | + |
| 224 | + segy_path = export_masked_path / f"{grid_conf.name}.sgy" |
| 225 | + mdio_path = export_masked_path / f"{grid_conf.name}.mdio" |
| 226 | + print(mdio_path) |
| 227 | + |
| 228 | + mock_nd_segy(segy_path, grid_conf, segy_factory_conf) |
| 229 | + |
| 230 | + index_names = segy_factory_conf.header_byte_map.keys() |
| 231 | + index_bytes = segy_factory_conf.header_byte_map.values() |
| 232 | + chunksize = segy_to_mdio_conf.chunks |
| 233 | + |
| 234 | + segy_to_mdio( |
| 235 | + segy_path.__str__(), |
| 236 | + mdio_path.__str__(), |
| 237 | + index_bytes, |
| 238 | + index_names, |
| 239 | + chunksize=chunksize, |
| 240 | + overwrite=True, |
| 241 | + ) |
| 242 | + |
| 243 | + def test_export( |
| 244 | + self, test_conf: MaskedExportConfig, export_masked_path: Path |
| 245 | + ) -> None: |
| 246 | + """Test export of an n-D MDIO file back to SEG-Y.""" |
| 247 | + grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf |
| 248 | + |
| 249 | + segy_path = export_masked_path / f"{grid_conf.name}.sgy" |
| 250 | + mdio_path = export_masked_path / f"{grid_conf.name}.mdio" |
| 251 | + segy_rt_path = export_masked_path / f"{grid_conf.name}_rt.sgy" |
| 252 | + |
| 253 | + index_names = segy_factory_conf.header_byte_map.keys() |
| 254 | + access_pattern = "".join(map(str, range(len(index_names) + 1))) |
| 255 | + mdio = MDIOReader(mdio_path.__str__(), access_pattern=access_pattern) |
| 256 | + |
| 257 | + chunks, shape = mdio.chunks, mdio.shape |
| 258 | + new_chunks = segy_export_rechunker(chunks, shape, dtype="float32", limit="0.3M") |
| 259 | + |
| 260 | + mdio_to_segy( |
| 261 | + mdio_path.__str__(), |
| 262 | + segy_rt_path.__str__(), |
| 263 | + access_pattern=access_pattern, |
| 264 | + new_chunks=new_chunks, |
| 265 | + ) |
| 266 | + |
| 267 | + expected_sgy = SegyFile(segy_path) |
| 268 | + actual_sgy = SegyFile(segy_rt_path) |
| 269 | + assert_array_equal(actual_sgy.trace[:], expected_sgy.trace[:]) |
| 270 | + |
| 271 | + def test_export_masked( |
| 272 | + self, test_conf: MaskedExportConfig, export_masked_path: Path |
| 273 | + ) -> None: |
| 274 | + """Test export of an n-D MDIO file back to SEG-Y with masked export.""" |
| 275 | + grid_conf, segy_factory_conf, segy_to_mdio_conf, selection_conf = test_conf |
| 276 | + |
| 277 | + segy_path = export_masked_path / f"{grid_conf.name}.sgy" |
| 278 | + mdio_path = export_masked_path / f"{grid_conf.name}.mdio" |
| 279 | + segy_rt_path = export_masked_path / f"{grid_conf.name}_rt.sgy" |
| 280 | + |
| 281 | + index_names = segy_factory_conf.header_byte_map.keys() |
| 282 | + access_pattern = "".join(map(str, range(len(index_names) + 1))) |
| 283 | + mdio = MDIOReader(mdio_path.__str__(), access_pattern=access_pattern) |
| 284 | + export_chunks = segy_export_rechunker( |
| 285 | + mdio.chunks, mdio.shape, dtype="float32", limit="0.3M" |
| 286 | + ) |
| 287 | + selection_mask = generate_selection_mask(selection_conf, grid_conf) |
| 288 | + |
| 289 | + mdio_to_segy( |
| 290 | + mdio_path.__str__(), |
| 291 | + segy_rt_path.__str__(), |
| 292 | + access_pattern=access_pattern, |
| 293 | + new_chunks=export_chunks, |
| 294 | + selection_mask=selection_mask, |
| 295 | + ) |
| 296 | + |
| 297 | + expected_trc_idx = selection_mask.ravel().nonzero()[0] |
| 298 | + expected_sgy = SegyFile(segy_path) |
| 299 | + actual_sgy = SegyFile(segy_rt_path) |
| 300 | + assert_array_equal(actual_sgy.trace[:], expected_sgy.trace[expected_trc_idx]) |
0 commit comments