66from typing import TYPE_CHECKING
77
88import numpy as np
9- from segy import SegyFile
109from segy .arrays import HeaderArray
1110
1211from mdio .core .config import MDIOSettings
1514from mdio .segy .file import SegyFileWrapper
1615
1716if TYPE_CHECKING :
17+ from segy import SegyFile
1818 from zarr import Array as zarr_Array
1919
2020from zarr .core .config import config as zarr_config
2525
2626logger = logging .getLogger (__name__ )
2727
28- # Global variable to store opened segy file per worker process
29- _worker_segy_file = None
30-
31-
32- def _init_worker (segy_file_kwargs : SegyFileArguments ) -> None :
33- """Initialize worker process with persistent segy file handle.
34-
35- This function is called once per worker process to open the segy file,
36- which is then reused across all tasks in that worker.
37-
38- Args:
39- segy_file_kwargs: Arguments to open SegyFile instance.
40- """
41- global _worker_segy_file # noqa: PLW0603
42- # TODO(BrianMichell): Diagnose and fix handles not being cleaned up on cloud2cloud ingesions.
43- # https://github.com/TGSAI/mdio-python/pull/712
44- # https://github.com/TGSAI/mdio-python/pull/701
45- # The reason for having a global variable is to reduce the number of GET requests for opening the file.
46-
47- # Open the SEG-Y file once per worker
48- _worker_segy_file = SegyFile (** segy_file_kwargs )
49-
5028
5129def header_scan_worker (
5230 segy_file_kwargs : SegyFileArguments ,
@@ -91,6 +69,7 @@ def header_scan_worker(
9169
9270
9371def trace_worker ( # noqa: PLR0913
72+ segy_file : SegyFile ,
9473 data_array : zarr_Array ,
9574 header_array : zarr_Array | None ,
9675 raw_header_array : zarr_Array | None ,
@@ -99,9 +78,8 @@ def trace_worker( # noqa: PLR0913
9978) -> SummaryStatistics | None :
10079 """Writes a subset of traces from a region of the dataset of Zarr file.
10180
102- Uses pre-opened segy file from _init_worker and receives zarr arrays directly.
103-
10481 Args:
82+ segy_file: The opened SEG-Y file.
10583 data_array: Zarr array for writing trace data.
10684 header_array: Zarr array for writing trace headers (or None if not needed).
10785 raw_header_array: Zarr array for writing raw headers (or None if not needed).
@@ -111,11 +89,6 @@ def trace_worker( # noqa: PLR0913
11189 Returns:
11290 SummaryStatistics object containing statistics about the written traces.
11391 """
114- global _worker_segy_file # noqa: PLW0602
115-
116- # Use the pre-opened segy file from worker initialization
117- segy_file = _worker_segy_file
118-
11992 # Setting the zarr config to 1 thread to ensure we honor the `MDIO__IMPORT__CPU_COUNT` environment variable.
12093 # The Zarr 3 engine utilizes multiple threads. This can lead to resource contention and unpredictable memory usage.
12194 zarr_config .set ({"threading.max_workers" : 1 })
0 commit comments