33from __future__ import annotations
44
55import json
6- import os
76from typing import TYPE_CHECKING
87
98import dask
2120from tests .integration .testing_helpers import validate_variable
2221
2322from mdio import mdio_to_segy
24- from mdio .converters .exceptions import GridTraceSparsityError
2523from mdio .converters .segy import segy_to_mdio
2624from mdio .core .storage_location import StorageLocation
2725from mdio .schemas .v1 .templates .template_registry import TemplateRegistry
2826from mdio .segy .compat import mdio_segy_spec
29- from mdio .segy .geometry import StreamerShotGeometryType
3027
3128if TYPE_CHECKING :
3229 from pathlib import Path
3330
3431dask .config .set (scheduler = "synchronous" )
3532
3633
37- @pytest .mark .parametrize ("index_bytes" , [(17 , 137 )])
38- @pytest .mark .parametrize ("index_names" , [("shot_point" , "cable" )])
39- @pytest .mark .parametrize ("index_types" , [("int32" , "int16" )])
40- @pytest .mark .parametrize ("grid_overrides" , [{"NonBinned" : True , "chunksize" : 2 }, {"HasDuplicates" : True }])
41- @pytest .mark .parametrize ("chan_header_type" , [StreamerShotGeometryType .C ])
42- class TestImport4DNonReg :
43- """Test for 4D segy import with grid overrides."""
44-
45- def test_import_4d_segy ( # noqa: PLR0913
46- self ,
47- segy_mock_4d_shots : dict [StreamerShotGeometryType , Path ],
48- zarr_tmp : Path ,
49- index_bytes : tuple [int , ...],
50- index_names : tuple [str , ...],
51- index_types : tuple [str , ...],
52- grid_overrides : dict [str , bool | int ],
53- chan_header_type : StreamerShotGeometryType ,
54- ) -> None :
55- """Test importing a SEG-Y file to MDIO."""
56- segy_path = segy_mock_4d_shots [chan_header_type ]
57-
58- segy_to_mdio (
59- segy_path = segy_path ,
60- mdio_path_or_buffer = zarr_tmp .__str__ (),
61- index_bytes = index_bytes ,
62- index_names = index_names ,
63- index_types = index_types ,
64- chunksize = (8 , 2 , 10 ),
65- overwrite = True ,
66- grid_overrides = grid_overrides ,
67- )
68-
69- # Expected values
70- num_samples = 25
71- shots = [2 , 3 , 5 , 6 , 7 , 8 , 9 ]
72- cables = [0 , 101 , 201 , 301 ]
73- receivers_per_cable = [1 , 5 , 7 , 5 ]
74-
75- # QC mdio output
76- ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
77- attrs = ds .attrs ["attributes" ]
78- assert attrs ["binaryHeader" ]["samples_per_trace" ] == num_samples
79-
80- assert list (ds [index_names [0 ]].values ) == shots
81- assert list (ds [index_names [1 ]].values ) == cables
82- assert list (ds ["trace" ].values ) == list (range (1 , np .amax (receivers_per_cable ) + 1 ))
83- sample_dim = ds ["amplitude" ].dims [- 1 ]
84- assert list (ds [sample_dim ].values ) == list (range (0 , num_samples , 1 ))
85-
86-
87- @pytest .mark .parametrize ("index_bytes" , [(17 , 137 , 13 )])
88- @pytest .mark .parametrize ("index_names" , [("shot_point" , "cable" , "channel" )])
89- @pytest .mark .parametrize ("index_types" , [("int32" , "int16" , "int32" )])
90- @pytest .mark .parametrize ("grid_overrides" , [{"AutoChannelWrap" : True }, None ])
91- @pytest .mark .parametrize ("chan_header_type" , [StreamerShotGeometryType .A , StreamerShotGeometryType .B ])
92- class TestImport4D :
93- """Test for 4D segy import with grid overrides."""
94-
95- def test_import_4d_segy ( # noqa: PLR0913
96- self ,
97- segy_mock_4d_shots : dict [StreamerShotGeometryType , Path ],
98- zarr_tmp : Path ,
99- index_bytes : tuple [int , ...],
100- index_names : tuple [str , ...],
101- index_types : tuple [str , ...],
102- grid_overrides : dict [str , bool | int ],
103- chan_header_type : StreamerShotGeometryType ,
104- ) -> None :
105- """Test importing a SEG-Y file to MDIO."""
106- segy_path = segy_mock_4d_shots [chan_header_type ]
107-
108- segy_to_mdio (
109- segy_path = segy_path ,
110- mdio_path_or_buffer = zarr_tmp .__str__ (),
111- index_bytes = index_bytes ,
112- index_names = index_names ,
113- index_types = index_types ,
114- chunksize = (8 , 2 , 128 , 1024 ),
115- overwrite = True ,
116- grid_overrides = grid_overrides ,
117- )
118-
119- # Expected values
120- num_samples = 25
121- shots = [2 , 3 , 5 , 6 , 7 , 8 , 9 ]
122- cables = [0 , 101 , 201 , 301 ]
123- receivers_per_cable = [1 , 5 , 7 , 5 ]
124-
125- # QC mdio output
126- ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
127- attrs = ds .attrs ["attributes" ]
128- assert attrs ["binaryHeader" ]["samples_per_trace" ] == num_samples
129-
130- assert list (ds [index_names [0 ]].values ) == shots
131- assert list (ds [index_names [1 ]].values ) == cables
132-
133- if chan_header_type == StreamerShotGeometryType .B and grid_overrides is None :
134- assert list (ds [index_names [2 ]].values ) == list (range (1 , np .sum (receivers_per_cable ) + 1 ))
135- else :
136- assert list (ds [index_names [2 ]].values ) == list (range (1 , np .amax (receivers_per_cable ) + 1 ))
137-
138- sample_dim = ds ["amplitude" ].dims [- 1 ]
139- assert list (ds [sample_dim ].values ) == list (range (0 , num_samples , 1 ))
140-
141-
142- @pytest .mark .parametrize ("index_bytes" , [(17 , 137 , 13 )])
143- @pytest .mark .parametrize ("index_names" , [("shot_point" , "cable" , "channel" )])
144- @pytest .mark .parametrize ("index_types" , [("int32" , "int16" , "int32" )])
145- @pytest .mark .parametrize ("chan_header_type" , [StreamerShotGeometryType .A ])
146- class TestImport4DSparse :
147- """Test for 4D segy import with grid overrides."""
148-
149- def test_import_4d_segy ( # noqa: PLR0913
150- self ,
151- segy_mock_4d_shots : dict [StreamerShotGeometryType , Path ],
152- zarr_tmp : Path ,
153- index_bytes : tuple [int , ...],
154- index_names : tuple [str , ...],
155- index_types : tuple [str , ...],
156- chan_header_type : StreamerShotGeometryType ,
157- ) -> None :
158- """Test importing a SEG-Y file to MDIO."""
159- segy_path = segy_mock_4d_shots [chan_header_type ]
160- os .environ ["MDIO__GRID__SPARSITY_RATIO_LIMIT" ] = "1.1"
161-
162- with pytest .raises (GridTraceSparsityError ) as execinfo :
163- segy_to_mdio (
164- segy_path = segy_path ,
165- mdio_path_or_buffer = zarr_tmp .__str__ (),
166- index_bytes = index_bytes ,
167- index_names = index_names ,
168- index_types = index_types ,
169- chunksize = (8 , 2 , 128 , 1024 ),
170- overwrite = True ,
171- )
172-
173- os .environ ["MDIO__GRID__SPARSITY_RATIO_LIMIT" ] = "10"
174- assert "This grid is very sparse and most likely user error with indexing." in str (execinfo .value )
175-
176-
177- @pytest .mark .parametrize ("index_bytes" , [(133 , 171 , 17 , 137 , 13 )])
178- @pytest .mark .parametrize ("index_names" , [("shot_line" , "gun" , "shot_point" , "cable" , "channel" )])
179- @pytest .mark .parametrize ("index_types" , [("int16" , "int16" , "int32" , "int16" , "int32" )])
180- @pytest .mark .parametrize ("grid_overrides" , [{"AutoChannelWrap" : True , "AutoShotWrap" : True }, None ])
181- @pytest .mark .parametrize ("chan_header_type" , [StreamerShotGeometryType .A , StreamerShotGeometryType .B ])
182- class TestImport6D :
183- """Test for 6D segy import with grid overrides."""
184-
185- def test_import_6d_segy ( # noqa: PLR0913
186- self ,
187- segy_mock_4d_shots : dict [StreamerShotGeometryType , Path ],
188- zarr_tmp : Path ,
189- index_bytes : tuple [int , ...],
190- index_names : tuple [str , ...],
191- index_types : tuple [str , ...],
192- grid_overrides : dict [str , bool ] | None ,
193- chan_header_type : StreamerShotGeometryType ,
194- ) -> None :
195- """Test importing a SEG-Y file to MDIO."""
196- segy_path = segy_mock_4d_shots [chan_header_type ]
197-
198- segy_to_mdio (
199- segy_path = segy_path ,
200- mdio_path_or_buffer = zarr_tmp .__str__ (),
201- index_bytes = index_bytes ,
202- index_names = index_names ,
203- index_types = index_types ,
204- chunksize = (1 , 1 , 8 , 1 , 12 , 36 ),
205- overwrite = True ,
206- grid_overrides = grid_overrides ,
207- )
208-
209- # Expected values
210- num_samples = 25
211- shots = [2 , 3 , 5 , 6 , 7 , 8 , 9 ] # original shot list
212- if grid_overrides is not None and "AutoShotWrap" in grid_overrides :
213- shots_new = [int (shot / 2 ) for shot in shots ] # Updated shot index when ingesting with 2 guns
214- shots_set = set (shots_new ) # remove duplicates
215- shots = list (shots_set ) # Unique shot points for 6D indexed with gun
216- cables = [0 , 101 , 201 , 301 ]
217- guns = [1 , 2 ]
218- receivers_per_cable = [1 , 5 , 7 , 5 ]
219-
220- # QC mdio output
221- ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
222- attrs = ds .attrs ["attributes" ]
223- assert attrs ["binaryHeader" ]["samples_per_trace" ] == num_samples
224-
225- assert list (ds [index_names [1 ]].values ) == guns
226- assert list (ds [index_names [2 ]].values ) == shots
227- assert list (ds [index_names [3 ]].values ) == cables
228-
229- if chan_header_type == StreamerShotGeometryType .B and grid_overrides is None :
230- assert list (ds [index_names [4 ]].values ) == list (range (1 , np .sum (receivers_per_cable ) + 1 ))
231- else :
232- assert list (ds [index_names [4 ]].values ) == list (range (1 , np .amax (receivers_per_cable ) + 1 ))
233-
234- sample_dim = ds ["amplitude" ].dims [- 1 ]
235- assert list (ds [sample_dim ].values ) == list (range (0 , num_samples , 1 ))
236-
237-
23834@pytest .mark .dependency
23935@pytest .mark .parametrize ("index_bytes" , [(17 , 13 , 81 , 85 )])
24036@pytest .mark .parametrize ("index_names" , [("inline" , "crossline" , "cdp_x" , "cdp_y" )])
@@ -270,16 +66,13 @@ class TestReader:
27066
27167 def test_meta_dataset_read (self , zarr_tmp : Path ) -> None :
27268 """Metadata reading tests."""
273- # NOTE: If mask_and_scale is not set,
274- # Xarray will convert int to float and replace _FillValue with NaN
27569 ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
27670 expected_attrs = {
27771 "apiVersion" : "1.0.0a1" ,
27872 "createdOn" : "2025-08-06 16:21:54.747880+00:00" ,
27973 "name" : "PostStack3DTime" ,
28074 }
28175 actual_attrs_json = ds .attrs
282- # compare one by one due to ever changing createdOn. For it, we only check existence
28376 for key , value in expected_attrs .items ():
28477 assert key in actual_attrs_json
28578 if key == "createdOn" :
@@ -290,21 +83,14 @@ def test_meta_dataset_read(self, zarr_tmp: Path) -> None:
29083 attributes = ds .attrs ["attributes" ]
29184 assert attributes is not None
29285
293- # Validate attributes provided by the template
29486 assert attributes ["surveyDimensionality" ] == "3D"
29587 assert attributes ["ensembleType" ] == "line"
29688 assert attributes ["processingStage" ] == "post-stack"
297-
298- # Validate text header
29989 assert attributes ["textHeader" ] == text_header_teapot_dome ()
300-
301- # Validate binary header
30290 assert attributes ["binaryHeader" ] == binary_header_teapot_dome ()
30391
30492 def test_meta_variable_read (self , zarr_tmp : Path ) -> None :
30593 """Metadata reading tests."""
306- # NOTE: If mask_and_scale is not set,
307- # Xarray will convert int to float and replace _FillValue with NaN
30894 ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
30995 expected_attrs = {
31096 "count" : 97354860 ,
@@ -319,26 +105,15 @@ def test_meta_variable_read(self, zarr_tmp: Path) -> None:
319105
320106 def test_grid (self , zarr_tmp : Path ) -> None :
321107 """Test validating MDIO variables."""
322- # Load Xarray dataset from the MDIO file
323- # NOTE: If mask_and_scale is not set,
324- # Xarray will convert int to float and replace _FillValue with NaN
325108 ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
326109
327- # Note: in order to create the dataset we used the Time template, so the
328- # sample dimension is called "time"
329-
330- # Validate the dimension coordinate variables
331110 validate_variable (ds , "inline" , (345 ,), ["inline" ], np .int32 , range (1 , 346 ), get_values )
332111 validate_variable (ds , "crossline" , (188 ,), ["crossline" ], np .int32 , range (1 , 189 ), get_values )
333112 validate_variable (ds , "time" , (1501 ,), ["time" ], np .int32 , range (0 , 3002 , 2 ), get_values )
334113
335- # Validate the non-dimensional coordinate variables
336114 validate_variable (ds , "cdp_x" , (345 , 188 ), ["inline" , "crossline" ], np .float64 , None , None )
337115 validate_variable (ds , "cdp_y" , (345 , 188 ), ["inline" , "crossline" ], np .float64 , None , None )
338116
339- # Validate the headers
340- # We have a subset of headers since we used customize_segy_specs() providing the values only
341- # for "inline", "crossline", "cdp_x", "cdp_y"
342117 data_type = np .dtype ([("inline" , "<i4" ), ("crossline" , "<i4" ), ("cdp_x" , "<i4" ), ("cdp_y" , "<i4" )])
343118 validate_variable (
344119 ds ,
@@ -350,10 +125,7 @@ def test_grid(self, zarr_tmp: Path) -> None:
350125 get_inline_header_values ,
351126 )
352127
353- # Validate the trace mask
354- validate_variable (ds , "trace_mask" , (345 , 188 ), ["inline" , "crossline" ], np .bool , None , None )
355-
356- # validate the amplitude data
128+ validate_variable (ds , "trace_mask" , (345 , 188 ), ["inline" , "crossline" ], np .bool_ , None , None )
357129 validate_variable (
358130 ds ,
359131 "amplitude" ,
@@ -366,52 +138,44 @@ def test_grid(self, zarr_tmp: Path) -> None:
366138
367139 def test_inline (self , zarr_tmp : Path ) -> None :
368140 """Read and compare every 75 inlines' mean and std. dev."""
369- # NOTE: If mask_and_scale is not set,
370- # Xarray will convert int to float and replace _FillValue with NaN
371141 ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
372142 inlines = ds ["amplitude" ][::75 , :, :]
373143 mean , std = inlines .mean (), inlines .std ()
374144 npt .assert_allclose ([mean , std ], [1.0555277e-04 , 6.0027051e-01 ])
375145
376146 def test_crossline (self , zarr_tmp : Path ) -> None :
377147 """Read and compare every 75 crosslines' mean and std. dev."""
378- # NOTE: If mask_and_scale is not set,
379- # Xarray will convert int to float and replace _FillValue with NaN
380148 ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
381- xlines = ds ["amplitude" ][:, ::75 , :]
149+ xlines = ds ["amplitude" ][:: , ::75 , :]
382150 mean , std = xlines .mean (), xlines .std ()
383-
384151 npt .assert_allclose ([mean , std ], [- 5.0329847e-05 , 5.9406823e-01 ])
385152
386153 def test_zslice (self , zarr_tmp : Path ) -> None :
387154 """Read and compare every 225 z-slices' mean and std. dev."""
388- # NOTE: If mask_and_scale is not set,
389- # Xarray will convert int to float and replace _FillValue with NaN
390155 ds = xr .open_dataset (zarr_tmp , engine = "zarr" , mask_and_scale = False )
391- slices = ds ["amplitude" ][:, :, ::225 ]
156+ slices = ds ["amplitude" ][::, : :, ::225 ]
392157 mean , std = slices .mean (), slices .std ()
393158 npt .assert_allclose ([mean , std ], [0.005236923 , 0.61279935 ])
394159
395160
396161@pytest .mark .dependency ("test_3d_import" )
397162class TestExport :
398- """Test SEG-Y exporting functionaliy ."""
163+ """Test SEG-Y exporting functionality ."""
399164
400165 def test_3d_export (self , zarr_tmp : Path , segy_export_tmp : Path ) -> None :
401- """Test 3D export to IBM and IEEE ."""
166+ """Export the ingested MDIO file back to SEG-Y ."""
402167 mdio_to_segy (
403168 input_location = StorageLocation (zarr_tmp .__str__ ()),
404169 output_location = StorageLocation (segy_export_tmp .__str__ ()),
405170 )
406171
407172 def test_size_equal (self , segy_input : Path , segy_export_tmp : Path ) -> None :
408- """Check if file sizes match on IBM file ."""
173+ """Confirm file sizes match after export ."""
409174 assert segy_input .stat ().st_size == segy_export_tmp .stat ().st_size
410175
411176 def test_rand_equal (self , segy_input : Path , segy_export_tmp : Path ) -> None :
412- """IBM. Is random original traces and headers match round-trip file? """
177+ """Verify trace data is preserved after round-trip export. """
413178 spec = mdio_segy_spec ()
414-
415179 in_segy = SegyFile (segy_input , spec = spec )
416180 out_segy = SegyFile (segy_export_tmp , spec = spec )
417181
0 commit comments