Skip to content

Commit aaf8fc6

Browse files
committed
Merge branch 'main' into v1
# Conflicts: # docs/requirements.txt # pyproject.toml # uv.lock
2 parents 5878e97 + 881bd35 commit aaf8fc6

File tree

8 files changed

+68
-42
lines changed

8 files changed

+68
-42
lines changed

.github/workflows/constraints.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
bump-my-version==1.1.2
2-
nox==2025.2.9
1+
bump-my-version==1.2.1
2+
nox==2025.5.1

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,6 @@ jobs:
130130
nox --session=coverage -- xml
131131
132132
- name: Upload coverage report
133-
uses: codecov/codecov-action@v5.1.2
133+
uses: codecov/codecov-action@v5.4.3
134134
env:
135135
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}

docs/requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
autodoc-pydantic==2.2.0
2-
furo==2024.8.6
2+
furo==2025.7.19
33
linkify-it-py==2.0.3
4-
myst-nb==1.2.0
4+
myst-nb==1.3.0
55
sphinx==8.2.3
66
sphinx-click==6.0.0
77
sphinx-copybutton==0.5.2

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ docs = [
8787

8888

8989
[tool.uv]
90-
required-version = "0.6.11"
90+
required-version = ">=0.6.11"
9191

9292
[tool.ruff]
9393
target-version = "py311"

src/mdio/segy/_workers.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,31 +5,47 @@
55
import os
66
from typing import TYPE_CHECKING
77
from typing import Any
8+
from typing import TypedDict
89
from typing import cast
910

1011
import numpy as np
12+
from segy import SegyFile
1113

1214
if TYPE_CHECKING:
13-
from segy import SegyFile
1415
from segy.arrays import HeaderArray
16+
from segy.config import SegySettings
17+
from segy.schema import SegySpec
1518
from zarr import Array
1619

1720
from mdio.core import Grid
1821

1922

20-
def header_scan_worker(segy_file: SegyFile, trace_range: tuple[int, int]) -> HeaderArray:
23+
class SegyFileArguments(TypedDict):
24+
"""Arguments to open SegyFile instance creation."""
25+
26+
url: str
27+
spec: SegySpec | None
28+
settings: SegySettings | None
29+
30+
31+
def header_scan_worker(
32+
segy_kw: SegyFileArguments,
33+
trace_range: tuple[int, int],
34+
) -> HeaderArray:
2135
"""Header scan worker.
2236
2337
If SegyFile is not open, it can either accept a path string or a handle that was opened in
2438
a different context manager.
2539
2640
Args:
27-
segy_file: SegyFile instance.
41+
segy_kw: Arguments to open SegyFile instance.
2842
trace_range: Tuple consisting of the trace ranges to read.
2943
3044
Returns:
3145
HeaderArray parsed from SEG-Y library.
3246
"""
47+
segy_file = SegyFile(**segy_kw)
48+
3349
slice_ = slice(*trace_range)
3450

3551
cloud_native_mode = os.getenv("MDIO__IMPORT__CLOUD_NATIVE", default="False")
@@ -52,7 +68,7 @@ def header_scan_worker(segy_file: SegyFile, trace_range: tuple[int, int]) -> Hea
5268

5369

5470
def trace_worker(
55-
segy_file: SegyFile,
71+
segy_kw: SegyFileArguments,
5672
data_array: Array,
5773
metadata_array: Array,
5874
grid: Grid,
@@ -68,7 +84,7 @@ def trace_worker(
6884
slices across the sample dimension since SEG-Y data isn't chunked, eliminating concern.
6985
7086
Args:
71-
segy_file: SegyFile instance.
87+
segy_kw: Arguments to open SegyFile instance.
7288
data_array: Handle for zarr.Array we are writing traces to
7389
metadata_array: Handle for zarr.Array we are writing trace headers
7490
grid: mdio.Grid instance
@@ -78,6 +94,7 @@ def trace_worker(
7894
Partial statistics for chunk, or None
7995
"""
8096
# Special case where there are no traces inside chunk.
97+
segy_file = SegyFile(**segy_kw)
8198
live_subset = grid.live_mask[chunk_indices[:-1]]
8299

83100
if np.count_nonzero(live_subset) == 0:

src/mdio/segy/blocked_io.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from __future__ import annotations
44

5-
import multiprocessing as mp
65
import os
76
from concurrent.futures import ProcessPoolExecutor
87
from itertools import repeat
@@ -48,22 +47,23 @@ def to_zarr(segy_file: SegyFile, grid: Grid, data_array: Array, header_array: Ar
4847
chunker = ChunkIterator(data_array, chunk_samples=False)
4948
num_chunks = len(chunker)
5049

51-
# For Unix async writes with s3fs/fsspec & multiprocessing, use 'spawn' instead of default
52-
# 'fork' to avoid deadlocks on cloud stores. Slower but necessary. Default on Windows.
5350
num_cpus = int(os.getenv("MDIO__IMPORT__CPU_COUNT", default_cpus))
5451
num_workers = min(num_chunks, num_cpus)
55-
context = mp.get_context("spawn")
56-
executor = ProcessPoolExecutor(max_workers=num_workers, mp_context=context)
5752

5853
# Chunksize here is for multiprocessing, not Zarr chunksize.
5954
pool_chunksize, extra = divmod(num_chunks, num_workers * 4)
6055
pool_chunksize += 1 if extra else pool_chunksize
6156

57+
segy_kw = {
58+
"url": segy_file.fs.unstrip_protocol(segy_file.url),
59+
"spec": segy_file.spec,
60+
"settings": segy_file.settings,
61+
}
6262
tqdm_kw = {"unit": "block", "dynamic_ncols": True}
63-
with executor:
63+
with ProcessPoolExecutor(max_workers=num_workers) as executor:
6464
lazy_work = executor.map(
6565
trace_worker, # fn
66-
repeat(segy_file),
66+
repeat(segy_kw),
6767
repeat(data_array),
6868
repeat(header_array),
6969
repeat(grid),

src/mdio/segy/parsers.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22

33
from __future__ import annotations
44

5-
import multiprocessing as mp
65
import os
76
from concurrent.futures import ProcessPoolExecutor
87
from itertools import repeat
@@ -48,15 +47,17 @@ def parse_index_headers(
4847

4948
trace_ranges.append((start, stop))
5049

51-
# For Unix async reads with s3fs/fsspec & multiprocessing, use 'spawn' instead of default
52-
# 'fork' to avoid deadlocks on cloud stores. Slower but necessary. Default on Windows.
5350
num_cpus = int(os.getenv("MDIO__IMPORT__CPU_COUNT", default_cpus))
5451
num_workers = min(n_blocks, num_cpus)
55-
context = mp.get_context("spawn")
5652

53+
segy_kw = {
54+
"url": segy_file.fs.unstrip_protocol(segy_file.url),
55+
"spec": segy_file.spec,
56+
"settings": segy_file.settings,
57+
}
5758
tqdm_kw = {"unit": "block", "dynamic_ncols": True}
58-
with ProcessPoolExecutor(num_workers, mp_context=context) as executor:
59-
lazy_work = executor.map(header_scan_worker, repeat(segy_file), trace_ranges)
59+
with ProcessPoolExecutor(num_workers) as executor:
60+
lazy_work = executor.map(header_scan_worker, repeat(segy_kw), trace_ranges)
6061

6162
if progress_bar is True:
6263
lazy_work = tqdm(

tests/integration/test_segy_import_export_masked.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,7 @@ def __iter__(self) -> Iterable[MaskedExportConfigTypes]:
145145
# fmt: on
146146

147147

148-
def mock_nd_segy(
149-
path: str,
150-
grid_conf: GridConfig,
151-
segy_factory_conf: SegyFactoryConfig,
152-
) -> None:
148+
def mock_nd_segy(path: str, grid_conf: GridConfig, segy_factory_conf: SegyFactoryConfig) -> None:
153149
"""Create a fake SEG-Y file with a multidimensional grid."""
154150
spec = get_segy_standard(segy_factory_conf.revision)
155151

@@ -161,6 +157,19 @@ def mock_nd_segy(
161157
header_flds.append(HeaderField(name="samples_per_trace", byte=115, format="int16"))
162158
header_flds.append(HeaderField(name="sample_interval", byte=117, format="int16"))
163159

160+
# Add coordinates: {SRC-REC-CDP}-X/Y
161+
header_flds.extend(
162+
[
163+
HeaderField(name="coord_scalar", byte=71, format="int16"),
164+
HeaderField(name="src_x", byte=73, format="int32"),
165+
HeaderField(name="src_y", byte=77, format="int32"),
166+
HeaderField(name="rec_x", byte=81, format="int32"),
167+
HeaderField(name="rec_y", byte=85, format="int32"),
168+
HeaderField(name="cdp_x", byte=181, format="int32"),
169+
HeaderField(name="cdp_y", byte=185, format="int32"),
170+
]
171+
)
172+
164173
spec = spec.customize(trace_header_fields=header_flds)
165174
spec.segy_standard = segy_factory_conf.revision
166175
factory = SegyFactory(spec=spec, samples_per_trace=segy_factory_conf.num_samples)
@@ -177,9 +186,17 @@ def mock_nd_segy(
177186
samples = factory.create_trace_sample_template(trace_numbers.size)
178187
headers = factory.create_trace_header_template(trace_numbers.size)
179188

189+
# Fill dimension coordinates (e.g. inline, crossline, etc.)
180190
for dim_idx, dim in enumerate(grid_conf.dims):
181191
headers[dim.name] = dim_grid[dim_idx].ravel()
182192

193+
# Fill coordinates (e.g. {SRC-REC-CDP}-X/Y
194+
headers["coord_scalar"] = -100
195+
for field in ["cdp_x", "src_x", "rec_x"]:
196+
headers[field] = np.random.randint(low=700000, high=900000, size=trace_numbers.size) * 100
197+
for field in ["cdp_y", "src_y", "rec_y"]:
198+
headers[field] = np.random.randint(low=4000000, high=5000000, size=trace_numbers.size) * 100
199+
183200
samples[:] = trace_numbers[..., None]
184201

185202
with fsspec.open(path, mode="wb") as fp:
@@ -188,10 +205,7 @@ def mock_nd_segy(
188205
fp.write(factory.create_traces(headers, samples))
189206

190207

191-
def generate_selection_mask(
192-
selection_conf: SelectionMaskConfig,
193-
grid_conf: GridConfig,
194-
) -> NDArray:
208+
def generate_selection_mask(selection_conf: SelectionMaskConfig, grid_conf: GridConfig) -> NDArray:
195209
"""Generate a boolean selection mask for a masked export test."""
196210
spatial_shape = [dim.size for dim in grid_conf.dims]
197211
mask_dims = selection_conf.mask_num_dims
@@ -224,9 +238,7 @@ def export_masked_path(tmp_path_factory: pytest.TempPathFactory) -> Path:
224238
class TestNdImportExport:
225239
"""Test import/export of n-D SEG-Ys to MDIO, with and without selection mask."""
226240

227-
def test_import(
228-
self, test_conf: MaskedExportConfig, export_masked_path: Path
229-
) -> None:
241+
def test_import(self, test_conf: MaskedExportConfig, export_masked_path: Path) -> None:
230242
"""Test import of an n-D SEG-Y file to MDIO."""
231243
grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf
232244

@@ -249,9 +261,7 @@ def test_import(
249261
overwrite=True,
250262
)
251263

252-
def test_export(
253-
self, test_conf: MaskedExportConfig, export_masked_path: Path
254-
) -> None:
264+
def test_export(self, test_conf: MaskedExportConfig, export_masked_path: Path) -> None:
255265
"""Test export of an n-D MDIO file back to SEG-Y."""
256266
grid_conf, segy_factory_conf, segy_to_mdio_conf, _ = test_conf
257267

@@ -277,9 +287,7 @@ def test_export(
277287
actual_sgy = SegyFile(segy_rt_path)
278288
assert_array_equal(actual_sgy.trace[:], expected_sgy.trace[:])
279289

280-
def test_export_masked(
281-
self, test_conf: MaskedExportConfig, export_masked_path: Path
282-
) -> None:
290+
def test_export_masked(self, test_conf: MaskedExportConfig, export_masked_path: Path) -> None:
283291
"""Test export of an n-D MDIO file back to SEG-Y with masked export."""
284292
grid_conf, segy_factory_conf, segy_to_mdio_conf, selection_conf = test_conf
285293

0 commit comments

Comments
 (0)