99import numpy as np
1010from segy .arrays import HeaderArray
1111
12- from mdio .api .io import _normalize_storage_options
1312from mdio .segy ._raw_trace_wrapper import SegyFileRawTraceWrapper
1413from mdio .segy .file import SegyFileArguments
1514from mdio .segy .file import SegyFileWrapper
1615
1716if TYPE_CHECKING :
18- from upath import UPath
1917 from zarr import Array as zarr_Array
18+ from zarr import Group as zarr_Group
2019
21- from zarr import open_group as zarr_open_group
2220from zarr .core .config import config as zarr_config
2321
2422from mdio .builder .schemas .v1 .stats import CenteredBinHistogram
@@ -75,7 +73,7 @@ def header_scan_worker(
7573
7674def trace_worker ( # noqa: PLR0913
7775 segy_file_kwargs : SegyFileArguments ,
78- output_path : UPath ,
76+ zarr_group : zarr_Group ,
7977 data_variable_name : str ,
8078 region : dict [str , slice ],
8179 grid_map : zarr_Array ,
@@ -84,9 +82,7 @@ def trace_worker( # noqa: PLR0913
8482
8583 Args:
8684 segy_file_kwargs: Arguments to open SegyFile instance.
87- output_path: Universal Path for the output Zarr dataset
88- (e.g. local file path or cloud storage URI) the location
89- also includes storage options for cloud storage.
85+ zarr_group: Zarr group to write to.
9086 data_variable_name: Name of the data variable to write.
9187 region: Region of the dataset to write to.
9288 grid_map: Zarr array mapping live traces to their positions in the dataset.
@@ -108,14 +104,11 @@ def trace_worker( # noqa: PLR0913
108104
109105 # Setting the zarr config to 1 thread to ensure we honor the `MDIO__IMPORT__MAX_WORKERS` environment variable.
110106 # The Zarr 3 engine utilizes multiple threads. This can lead to resource contention and unpredictable memory usage.
107+ # This remains set here to ensure that each worker does not use more than 1 thread.
111108 zarr_config .set ({"threading.max_workers" : 1 })
112109
113110 live_trace_indexes = local_grid_map [not_null ].tolist ()
114111
115- # Open the zarr group to write directly
116- storage_options = _normalize_storage_options (output_path )
117- zarr_group = zarr_open_group (output_path .as_posix (), mode = "r+" , storage_options = storage_options )
118-
119112 header_key = "headers"
120113 raw_header_key = "raw_headers"
121114
0 commit comments