1010from typing import Optional
1111from typing import Union
1212
13- import psutil
14-
1513_config_values : dict = {}
1614
1715# we need a preliminary version of this variable
@@ -30,8 +28,15 @@ def memory_allocation_calculation(allocation: Union[float, int]) -> int:
3028 Returns:
3129 int: Memory size in bytes to be allocated.
3230 """
33- total_memory = psutil .virtual_memory ().total # Convert bytes to megabytes
31+ # Import psutil lazily to avoid paying the import cost at module import time.
32+ # Use a small helper so tests or callers that need the value will trigger the
33+ # import only when this function is called.
34+ def _get_total_memory_bytes () -> int :
35+ import psutil
36+
37+ return psutil .virtual_memory ().total
3438
39+ total_memory = _get_total_memory_bytes ()
3540 if 0 < allocation < 1 : # Treat as a percentage
3641 return int (total_memory * allocation )
3742 elif allocation >= 1 : # Treat as an absolute value in MB
@@ -44,9 +49,13 @@ def system_gigabytes() -> int:
4449 """
4550 Get the total system memory in gigabytes.
4651
52+ This imports psutil lazily to avoid paying the cost at module import time.
53+
4754 Returns:
4855 int: Total system memory in gigabytes.
4956 """
57+ import psutil
58+
5059 return psutil .virtual_memory ().total // (1024 * 1024 * 1024 )
5160
5261
@@ -163,20 +172,67 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
163172MAX_CONSECUTIVE_CACHE_FAILURES : int = int (get ("MAX_CONSECUTIVE_CACHE_FAILURES" , 10 ))
164173"""Maximum number of consecutive cache failures before disabling cache usage."""
165174
166- MAX_LOCAL_BUFFER_CAPACITY : int = memory_allocation_calculation (float (get ("MAX_LOCAL_BUFFER_CAPACITY" , 0.2 )))
167- """Local buffer pool size in either bytes or fraction of system memory."""
175+ # These values are computed lazily via __getattr__ to avoid importing
176+ # psutil (and making expensive system calls) during module import.
177+ # Annotate the names so type checkers know about them, but do not assign
178+ # values here — __getattr__ will compute and cache them on first access.
179+ MAX_LOCAL_BUFFER_CAPACITY : int
180+ """Local buffer pool size in either bytes or fraction of system memory (lazy)."""
168181
169- MAX_READ_BUFFER_CAPACITY : int = memory_allocation_calculation ( float ( get ( "MAX_READ_BUFFER_CAPACITY" , 0.1 )))
170- """Read buffer pool size in either bytes or fraction of system memory."""
182+ MAX_READ_BUFFER_CAPACITY : int
183+ """Read buffer pool size in either bytes or fraction of system memory (lazy) ."""
171184
172185MAX_STATISTICS_CACHE_ITEMS : int = get ("MAX_STATISTICS_CACHE_ITEMS" , 10_000 )
173186"""The number of .parquet files we cache the statistics for."""
174187
175- CONCURRENT_READS : int = int (get ("CONCURRENT_READS" , max (system_gigabytes (), 2 )))
176- """Number of read workers per data source."""
188+ _LAZY_VALUES : dict = {}
189+
190+
191+ # Lazily computed configuration values. We compute certain values on first
192+ # access because they depend on expensive system calls (psutil) or other
193+ # runtime properties. Access these as attributes on the module; __getattr__
194+ # will compute and cache them.
195+
196+ CONCURRENT_WORKERS_DEFAULT = int (get ("CONCURRENT_WORKERS" , 2 ))
197+
198+
199+ def _compute_MAX_LOCAL_BUFFER_CAPACITY ():
200+ return memory_allocation_calculation (float (get ("MAX_LOCAL_BUFFER_CAPACITY" , 0.2 )))
201+
202+
203+ def _compute_MAX_READ_BUFFER_CAPACITY ():
204+ return memory_allocation_calculation (float (get ("MAX_READ_BUFFER_CAPACITY" , 0.1 )))
205+
206+
207+ def _compute_CONCURRENT_READS ():
208+ # default to max(system_gigabytes(), 2)
209+ return int (get ("CONCURRENT_READS" , max (system_gigabytes (), 2 )))
210+
177211
178- CONCURRENT_WORKERS : int = int (get ("CONCURRENT_WORKERS" , 2 ))
179- """Number of worker threads created to execute queries."""
212+ def __getattr__ (name : str ):
213+ """Lazy attribute access for computed config values."""
214+ if name == "MAX_LOCAL_BUFFER_CAPACITY" :
215+ val = _LAZY_VALUES .get (name )
216+ if val is None :
217+ val = _compute_MAX_LOCAL_BUFFER_CAPACITY ()
218+ _LAZY_VALUES [name ] = val
219+ return val
220+ if name == "MAX_READ_BUFFER_CAPACITY" :
221+ val = _LAZY_VALUES .get (name )
222+ if val is None :
223+ val = _compute_MAX_READ_BUFFER_CAPACITY ()
224+ _LAZY_VALUES [name ] = val
225+ return val
226+ if name == "CONCURRENT_READS" :
227+ val = _LAZY_VALUES .get (name )
228+ if val is None :
229+ val = _compute_CONCURRENT_READS ()
230+ _LAZY_VALUES [name ] = val
231+ return val
232+ if name == "CONCURRENT_WORKERS" :
233+ # simple default, no expensive computation
234+ return CONCURRENT_WORKERS_DEFAULT
235+ raise AttributeError (name )
180236
181237DATA_CATALOG_PROVIDER : str = get ("DATA_CATALOG_PROVIDER" )
182238"""Data Catalog provider."""
@@ -197,6 +253,7 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
197253# don't output resource (memory) utilization information
198254ENABLE_RESOURCE_LOGGING : bool = bool (get ("ENABLE_RESOURCE_LOGGING" , False ))
199255# size of morsels to push between steps
256+ # MORSEL_SIZE remains a plain constant
200257MORSEL_SIZE : int = int (get ("MORSEL_SIZE" , 64 * 1024 * 1024 ))
201258# not GA
202259PROFILE_LOCATION :str = get ("PROFILE_LOCATION" )
0 commit comments