|
| 1 | +"""Integration test for MDIO rechunking. |
| 2 | +
|
| 3 | +This test creates a fake 3D SEG‑Y file with a 3×4 grid (3 inlines and 4 crosslines) |
| 4 | +with 100 samples per trace. Each trace header stores its inline and crossline numbers. |
| 5 | +It then converts the SEG‑Y file to MDIO, reads the original data arrays, performs a |
| 6 | +rechunk operation via the convenience API, and finally validates that the data in the |
| 7 | +new rechunked arrays exactly matches the original MDIO data. |
| 8 | +""" |
| 9 | + |
| 10 | +import struct |
| 11 | + |
| 12 | +import numpy as np |
| 13 | +import pytest |
| 14 | + |
| 15 | +from mdio.api import convenience |
| 16 | +from mdio.api.accessor import MDIOAccessor |
| 17 | +from mdio.converters import segy_to_mdio |
| 18 | + |
| 19 | + |
| 20 | +def create_fake_segy_3d(file_path): |
| 21 | + """Create a fake 3D SEG-Y file with 3 inlines and 4 crosslines and 100 samples per trace. |
| 22 | +
|
| 23 | + Each trace header includes inline and crossline numbers, stored in big-endian format |
| 24 | + at positions corresponding to the SEG-Y standard (bytes 189 and 193). |
| 25 | + """ |
| 26 | + num_inlines = 3 |
| 27 | + num_crosslines = 4 |
| 28 | + samples_per_trace = 100 |
| 29 | + |
| 30 | + with open(file_path, "wb") as f: |
| 31 | + # Write textual header (3200 bytes). |
| 32 | + f.write(b" " * 3200) |
| 33 | + # Create a binary header of 400 bytes using a mutable bytearray. |
| 34 | + bin_header = bytearray(400) |
| 35 | + # For SEG‑Y revision 0, the sample interval is stored at bytes 17–18 (0-indexed: 16:18). |
| 36 | + # Set the sample interval to 1000 microseconds. |
| 37 | + bin_header[16:18] = struct.pack(">H", 1000) |
| 38 | + # The number of samples per trace is stored at bytes 21–22 (0-indexed: 20:22). |
| 39 | + # Set the number of samples per trace to 100. |
| 40 | + bin_header[20:22] = struct.pack(">H", 100) |
| 41 | + # Optionally, set the data sample format code at bytes 25–26 |
| 42 | + # (0-indexed: 24:26) to 5 (IEEE floating point). |
| 43 | + bin_header[24:26] = struct.pack(">H", 5) |
| 44 | + # Set bytes 96-99 to 0 so that explicit endianness code is 0 |
| 45 | + # and the SEG-Y library will fall back to the legacy method. |
| 46 | + bin_header[96:100] = b"\x00" * 4 |
| 47 | + f.write(bin_header) |
| 48 | + for inline in range(1, num_inlines + 1): |
| 49 | + for crossline in range(1, num_crosslines + 1): |
| 50 | + # Create a 240-byte trace header. |
| 51 | + header = bytearray(240) |
| 52 | + # SEG‑Y standard: |
| 53 | + # - Inline number is stored at bytes 189-192. |
| 54 | + # - Crossline number is stored at bytes 193-196. |
| 55 | + # - Python indexing is 0-based. |
| 56 | + header[188:192] = struct.pack(">i", inline) |
| 57 | + header[192:196] = struct.pack(">i", crossline) |
| 58 | + f.write(header) |
| 59 | + # Create trace sample data. |
| 60 | + # For each IL/XL pair, we increment the base value by 1, and for each trace |
| 61 | + # (i.e. each sample) we increment by 0.002. |
| 62 | + trace_samples = np.arange( |
| 63 | + samples_per_trace, dtype=np.float32 |
| 64 | + ) * 0.002 + (inline * 10 + crossline + 1) |
| 65 | + # Convert samples to big-endian IEEE float32 before writing |
| 66 | + trace_samples_be = trace_samples.astype(">f4") |
| 67 | + f.write(trace_samples_be.tobytes()) |
| 68 | + |
| 69 | + |
| 70 | +@pytest.fixture |
| 71 | +def segy_file(tmp_path): |
| 72 | + """Create a fake 3D SEG-Y file with 3 inlines and 4 crosslines and 100 samples per trace.""" |
| 73 | + segy_path = tmp_path / "fake3d.sgy" |
| 74 | + create_fake_segy_3d(segy_path) |
| 75 | + return segy_path |
| 76 | + |
| 77 | + |
| 78 | +@pytest.fixture |
| 79 | +def mdio_path(tmp_path): |
| 80 | + """Create a temporary MDIO file.""" |
| 81 | + return tmp_path / "test.mdio" |
| 82 | + |
| 83 | + |
| 84 | +def test_rechunk_integration(segy_file, mdio_path): |
| 85 | + """Basic rechunking test. |
| 86 | +
|
| 87 | + 1. Convert a fake 3D SEG-Y file to an MDIO file. |
| 88 | + 2. Capture the original data arrays from the resulting MDIO file. |
| 89 | + 3. Perform a rechunk operation via the convenience API. |
| 90 | + 4. Validate that the rechunked arrays have the same underlying data as the original, |
| 91 | + ensuring that data integrity remains undamaged. |
| 92 | + """ |
| 93 | + # Convert the fake SEG-Y file to MDIO. |
| 94 | + # For conversion, we choose inline and crossline header values from bytes 189 and 193. |
| 95 | + segy_to_mdio( |
| 96 | + segy_path=str(segy_file), |
| 97 | + mdio_path_or_buffer=str(mdio_path), |
| 98 | + index_bytes=(189, 193), |
| 99 | + index_names=("inline", "crossline"), |
| 100 | + chunksize=(2, 2, 100), |
| 101 | + overwrite=True, |
| 102 | + ) |
| 103 | + |
| 104 | + # Create an MDIOReader for the newly created MDIO file. |
| 105 | + reader = MDIOAccessor( |
| 106 | + str(mdio_path), |
| 107 | + mode="r+", |
| 108 | + access_pattern="012", |
| 109 | + storage_options=None, |
| 110 | + return_metadata=False, |
| 111 | + new_chunks=None, |
| 112 | + backend="zarr", |
| 113 | + memory_cache_size=0, |
| 114 | + disk_cache=False, |
| 115 | + ) |
| 116 | + |
| 117 | + # Capture the original data. |
| 118 | + original_traces = reader._traces[ |
| 119 | + : |
| 120 | + ] # Main data array (3D: inline, crossline, samples). |
| 121 | + original_headers = reader._headers[:] # Header array. |
| 122 | + |
| 123 | + # Choose a new chunk size different from the original. |
| 124 | + # Here we change the chunking of the inline dimension. |
| 125 | + new_chunk = (3, 4, 50) |
| 126 | + |
| 127 | + # Perform rechunking with a new suffix. |
| 128 | + convenience.rechunk(reader, new_chunk, "sample", overwrite=True) |
| 129 | + |
| 130 | + # After rechunk, we need to reinitialize the reader to access the new chunks |
| 131 | + rechunked_reader = MDIOAccessor( |
| 132 | + str(mdio_path), |
| 133 | + mode="r+", |
| 134 | + access_pattern="sample", |
| 135 | + storage_options=None, |
| 136 | + return_metadata=False, |
| 137 | + new_chunks=None, |
| 138 | + backend="zarr", |
| 139 | + memory_cache_size=0, |
| 140 | + disk_cache=False, |
| 141 | + ) |
| 142 | + |
| 143 | + # Get the rechunked data using the accessor's methods |
| 144 | + rechunked_data = rechunked_reader._traces[:] |
| 145 | + rechunked_headers = rechunked_reader._headers[:] |
| 146 | + |
| 147 | + # Validate that the underlying data has not changed. |
| 148 | + np.testing.assert_array_equal(original_traces, rechunked_data) |
| 149 | + np.testing.assert_array_equal(original_headers, rechunked_headers) |
0 commit comments