Skip to content

Commit 75bfa50

Browse files
committed
simplify mp code and remove wrappers
1 parent 7f9d580 commit 75bfa50

File tree

3 files changed

+15
-40
lines changed

3 files changed

+15
-40
lines changed

src/mdio/segy/_workers.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -208,17 +208,3 @@ def trace_worker(
208208
max_val = tmp_data.max()
209209

210210
return count, chunk_sum, chunk_sum_squares, min_val, max_val
211-
212-
213-
# tqdm only works properly with pool.map
214-
# However, we need pool.starmap because we have more than one
215-
# argument to make pool.map work with multiple arguments, we
216-
# wrap the function and consolidate arguments to one
217-
def trace_worker_wrapper(args):
218-
"""Wrapper to make it work with map and multiple arguments."""
219-
return trace_worker(*args)
220-
221-
222-
def header_scan_worker_wrapper(args):
223-
"""Wrapper to make it work with map and multiple arguments."""
224-
return header_scan_worker(*args)

src/mdio/segy/blocked_io.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from mdio.core import Grid
2222
from mdio.core.indexing import ChunkIterator
23-
from mdio.segy._workers import trace_worker_wrapper
23+
from mdio.segy._workers import trace_worker
2424
from mdio.segy.byte_utils import ByteOrder
2525
from mdio.segy.byte_utils import Dtype
2626
from mdio.segy.creation import concat_files
@@ -133,16 +133,6 @@ def to_zarr(
133133
chunker = ChunkIterator(trace_array, chunk_samples=False)
134134
num_chunks = len(chunker)
135135

136-
# Setting all multiprocessing parameters.
137-
parallel_inputs = zip( # noqa: B905
138-
repeat(segy_path),
139-
repeat(trace_array),
140-
repeat(header_array),
141-
repeat(grid),
142-
chunker,
143-
repeat(segy_endian),
144-
)
145-
146136
# For Unix async writes with s3fs/fsspec & multiprocessing,
147137
# use 'spawn' instead of default 'fork' to avoid deadlocks
148138
# on cloud stores. Slower but necessary. Default on Windows.
@@ -157,8 +147,13 @@ def to_zarr(
157147
tqdm_kw = dict(unit="block", dynamic_ncols=True)
158148
with executor:
159149
lazy_work = executor.map(
160-
trace_worker_wrapper, # fn
161-
parallel_inputs, # iterables
150+
trace_worker, # fn
151+
repeat(segy_path),
152+
repeat(trace_array),
153+
repeat(header_array),
154+
repeat(grid),
155+
chunker,
156+
repeat(segy_endian),
162157
chunksize=pool_chunksize,
163158
)
164159

src/mdio/segy/parsers.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from tqdm.auto import tqdm
1616

1717
from mdio.core import Dimension
18-
from mdio.segy._workers import header_scan_worker_wrapper
18+
from mdio.segy._workers import header_scan_worker
1919

2020

2121
NUM_CORES = cpu_count(logical=False)
@@ -104,24 +104,18 @@ def parse_trace_headers(
104104

105105
trace_ranges.append((start, stop))
106106

107-
# Note: Make sure the order of this is exactly
108-
# the same as the function call.
109-
parallel_inputs = zip( # noqa: B905 or strict=False >= py3.10
110-
repeat(segy_path),
111-
trace_ranges,
112-
repeat(byte_locs),
113-
repeat(byte_lengths),
114-
repeat(segy_endian),
115-
)
116-
117107
num_workers = min(n_blocks, NUM_CORES)
118108

119109
tqdm_kw = dict(unit="block", dynamic_ncols=True)
120110
with ProcessPoolExecutor(num_workers) as executor:
121111
# pool.imap is lazy
122112
lazy_work = executor.map(
123-
header_scan_worker_wrapper, # fn
124-
parallel_inputs, # iterables
113+
header_scan_worker, # fn
114+
repeat(segy_path),
115+
trace_ranges,
116+
repeat(byte_locs),
117+
repeat(byte_lengths),
118+
repeat(segy_endian),
125119
chunksize=2, # Not array chunks. This is for `multiprocessing`
126120
)
127121

0 commit comments

Comments
 (0)