Skip to content

Commit 5ba71d5

Browse files
authored
Use spawn multiprocessing context for header scans (TGSAI#676)
* Fix warning: use of fork() may lead to deadlocks in the child * Run pre-commit
1 parent 3c84eee commit 5ba71d5

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/mdio/segy/parsers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import multiprocessing as mp
56
import os
67
from concurrent.futures import ProcessPoolExecutor
78
from itertools import repeat
@@ -58,7 +59,10 @@ def parse_headers(
5859
"settings": segy_file.settings,
5960
}
6061
tqdm_kw = {"unit": "block", "dynamic_ncols": True}
61-
with ProcessPoolExecutor(num_workers) as executor:
62+
# For Unix async writes with s3fs/fsspec & multiprocessing, use 'spawn' instead of default
63+
# 'fork' to avoid deadlocks on cloud stores. Slower but necessary. Default on Windows.
64+
context = mp.get_context("spawn")
65+
with ProcessPoolExecutor(num_workers, mp_context=context) as executor:
6266
lazy_work = executor.map(header_scan_worker, repeat(segy_kw), trace_ranges, repeat(subset))
6367

6468
if progress_bar is True:

0 commit comments

Comments
 (0)