1010from  typing  import  Optional 
1111from  typing  import  Union 
1212
13- import  psutil 
14- 
1513_config_values : dict  =  {}
1614
1715# we need a preliminary version of this variable 
@@ -30,8 +28,16 @@ def memory_allocation_calculation(allocation: Union[float, int]) -> int:
3028    Returns: 
3129        int: Memory size in bytes to be allocated. 
3230    """ 
33-     total_memory  =  psutil .virtual_memory ().total   # Convert bytes to megabytes 
3431
32+     # Import psutil lazily to avoid paying the import cost at module import time. 
33+     # Use a small helper so tests or callers that need the value will trigger the 
34+     # import only when this function is called. 
35+     def  _get_total_memory_bytes () ->  int :
36+         import  psutil 
37+ 
38+         return  psutil .virtual_memory ().total 
39+ 
40+     total_memory  =  _get_total_memory_bytes ()
3541    if  0  <  allocation  <  1 :  # Treat as a percentage 
3642        return  int (total_memory  *  allocation )
3743    elif  allocation  >=  1 :  # Treat as an absolute value in MB 
@@ -44,9 +50,13 @@ def system_gigabytes() -> int:
4450    """ 
4551    Get the total system memory in gigabytes. 
4652
53+     This imports psutil lazily to avoid paying the cost at module import time. 
54+ 
4755    Returns: 
4856        int: Total system memory in gigabytes. 
4957    """ 
58+     import  psutil 
59+ 
5060    return  psutil .virtual_memory ().total  //  (1024  *  1024  *  1024 )
5161
5262
@@ -163,20 +173,67 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
163173MAX_CONSECUTIVE_CACHE_FAILURES : int  =  int (get ("MAX_CONSECUTIVE_CACHE_FAILURES" , 10 ))
164174"""Maximum number of consecutive cache failures before disabling cache usage.""" 
165175
166- MAX_LOCAL_BUFFER_CAPACITY : int  =  memory_allocation_calculation (float (get ("MAX_LOCAL_BUFFER_CAPACITY" , 0.2 )))
167- """Local buffer pool size in either bytes or fraction of system memory.""" 
176+ # These values are computed lazily via __getattr__ to avoid importing 
177+ # psutil (and making expensive system calls) during module import. 
178+ # Annotate the names so type checkers know about them, but do not assign 
179+ # values here — __getattr__ will compute and cache them on first access. 
180+ MAX_LOCAL_BUFFER_CAPACITY : int 
181+ """Local buffer pool size in either bytes or fraction of system memory (lazy).""" 
168182
169- MAX_READ_BUFFER_CAPACITY : int   =   memory_allocation_calculation ( float ( get ( "MAX_READ_BUFFER_CAPACITY" ,  0.1 ))) 
170- """Read buffer pool size in either bytes or fraction of system memory.""" 
183+ MAX_READ_BUFFER_CAPACITY : int 
184+ """Read buffer pool size in either bytes or fraction of system memory (lazy) .""" 
171185
172186MAX_STATISTICS_CACHE_ITEMS : int  =  get ("MAX_STATISTICS_CACHE_ITEMS" , 10_000 )
173187"""The number of .parquet files we cache the statistics for.""" 
174188
175- CONCURRENT_READS : int  =  int (get ("CONCURRENT_READS" , max (system_gigabytes (), 2 )))
176- """Number of read workers per data source.""" 
189+ _LAZY_VALUES : dict  =  {}
190+ 
191+ 
192+ # Lazily computed configuration values. We compute certain values on first 
193+ # access because they depend on expensive system calls (psutil) or other 
194+ # runtime properties. Access these as attributes on the module; __getattr__ 
195+ # will compute and cache them. 
196+ 
197+ CONCURRENT_WORKERS_DEFAULT  =  int (get ("CONCURRENT_WORKERS" , 2 ))
198+ 
199+ 
200+ def  _compute_MAX_LOCAL_BUFFER_CAPACITY ():
201+     return  memory_allocation_calculation (float (get ("MAX_LOCAL_BUFFER_CAPACITY" , 0.2 )))
202+ 
203+ 
204+ def  _compute_MAX_READ_BUFFER_CAPACITY ():
205+     return  memory_allocation_calculation (float (get ("MAX_READ_BUFFER_CAPACITY" , 0.1 )))
206+ 
207+ 
208+ def  _compute_CONCURRENT_READS ():
209+     # default to max(system_gigabytes(), 2) 
210+     return  int (get ("CONCURRENT_READS" , max (system_gigabytes (), 2 )))
211+ 
177212
178- CONCURRENT_WORKERS : int  =  int (get ("CONCURRENT_WORKERS" , 2 ))
179- """Number of worker threads created to execute queries.""" 
213+ def  __getattr__ (name : str ):
214+     """Lazy attribute access for computed config values.""" 
215+     if  name  ==  "MAX_LOCAL_BUFFER_CAPACITY" :
216+         val  =  _LAZY_VALUES .get (name )
217+         if  val  is  None :
218+             val  =  _compute_MAX_LOCAL_BUFFER_CAPACITY ()
219+             _LAZY_VALUES [name ] =  val 
220+         return  val 
221+     if  name  ==  "MAX_READ_BUFFER_CAPACITY" :
222+         val  =  _LAZY_VALUES .get (name )
223+         if  val  is  None :
224+             val  =  _compute_MAX_READ_BUFFER_CAPACITY ()
225+             _LAZY_VALUES [name ] =  val 
226+         return  val 
227+     if  name  ==  "CONCURRENT_READS" :
228+         val  =  _LAZY_VALUES .get (name )
229+         if  val  is  None :
230+             val  =  _compute_CONCURRENT_READS ()
231+             _LAZY_VALUES [name ] =  val 
232+         return  val 
233+     if  name  ==  "CONCURRENT_WORKERS" :
234+         # simple default, no expensive computation 
235+         return  CONCURRENT_WORKERS_DEFAULT 
236+     raise  AttributeError (name )
180237
181238DATA_CATALOG_PROVIDER : str  =  get ("DATA_CATALOG_PROVIDER" )
182239"""Data Catalog provider.""" 
@@ -197,6 +254,7 @@ def get(key: str, default: Optional[typing.Any] = None) -> Optional[typing.Any]:
197254# don't output resource (memory) utilization information 
198255ENABLE_RESOURCE_LOGGING : bool  =  bool (get ("ENABLE_RESOURCE_LOGGING" , False ))
199256# size of morsels to push between steps 
257+ # MORSEL_SIZE remains a plain constant 
200258MORSEL_SIZE : int  =  int (get ("MORSEL_SIZE" , 64  *  1024  *  1024 ))
201259# not GA 
202260PROFILE_LOCATION :str  =  get ("PROFILE_LOCATION" )
0 commit comments