99import ray
1010
1111from consts .const import (
12- RAY_NUM_CPUS ,
1312 RAY_OBJECT_STORE_MEMORY_GB ,
14- RAY_PLASMA_DIRECTORY ,
1513 RAY_TEMP_DIR ,
14+ RAY_preallocate_plasma ,
1615)
1716
1817logger = logging .getLogger ("data_process.ray_config" )
@@ -25,9 +24,9 @@ class RayConfig:
2524 """Ray configuration manager"""
2625
2726 def __init__ (self ):
28- self .plasma_directory = RAY_PLASMA_DIRECTORY
2927 self .object_store_memory_gb = RAY_OBJECT_STORE_MEMORY_GB
3028 self .temp_dir = RAY_TEMP_DIR
29+ self .preallocate_plasma = RAY_preallocate_plasma
3130
3231 def get_init_params (
3332 self ,
@@ -52,7 +51,6 @@ def get_init_params(
5251 """
5352 params = {
5453 "ignore_reinit_error" : True ,
55- "_plasma_directory" : self .plasma_directory ,
5654 }
5755
5856 if address :
@@ -70,9 +68,16 @@ def get_init_params(
7068 # Temp directory configuration
7169 params ["_temp_dir" ] = self .temp_dir
7270
71+ # Object spilling directory (stable API)
72+ # This allows Ray to spill objects to disk when memory is full
73+ params ["object_spilling_directory" ] = self .temp_dir
74+
7375 # Dashboard configuration
76+ # Always pass include_dashboard explicitly because Ray's default is True.
77+ # If we omit this parameter when include_dashboard is False,
78+ # Ray will still start the dashboard by default.
79+ params ["include_dashboard" ] = include_dashboard
7480 if include_dashboard :
75- params ["include_dashboard" ] = True
7681 params ["dashboard_host" ] = dashboard_host
7782 params ["dashboard_port" ] = dashboard_port
7883
@@ -93,30 +98,49 @@ def init_ray(self, **kwargs) -> bool:
9398 logger .info ("Ray already initialized, skipping..." )
9499 return True
95100
101+ # Set RAY_preallocate_plasma environment variable before initialization
102+ # Ray reads this environment variable during initialization
103+ os .environ ["RAY_preallocate_plasma" ] = str (
104+ self .preallocate_plasma ).lower ()
105+
96106 params = self .get_init_params (** kwargs )
97107
98108 # Log the attempt to initialize
99- logger .debug ("Initializing Ray cluster..." )
100- logger .debug ("Ray configuration parameters:" )
109+ logger .info ("Initializing Ray cluster..." )
110+ logger .info ("Ray memory optimization configuration:" )
111+ logger .info (
112+ f" RAY_preallocate_plasma: { self .preallocate_plasma } " )
113+ logger .info (
114+ f" Object store memory: { self .object_store_memory_gb } GB" )
101115 for key , value in params .items ():
102116 if key .startswith ('_' ):
103117 logger .debug (f" { key } : { value } " )
104118 elif key == 'object_store_memory' :
105- logger .debug (f" { key } : { value / (1024 ** 3 ):.1f} GB" )
119+ logger .info (f" { key } : { value / (1024 ** 3 ):.2f} GB" )
120+ elif key == 'object_spilling_directory' :
121+ logger .info (f" { key } : { value } " )
106122 else :
107123 logger .debug (f" { key } : { value } " )
108124
109125 ray .init (** params )
110126 logger .info ("✅ Ray initialization successful" )
111127
112- # Display cluster information
128+ # Display cluster information and verify memory configuration
113129 try :
114130 if hasattr (ray , 'cluster_resources' ):
115131 resources = ray .cluster_resources ()
116- logger .debug (f"Ray cluster resources: { resources } " )
132+ logger .info (f"Ray cluster resources: { resources } " )
133+
134+ # Log memory-related resources
135+ if 'memory' in resources :
136+ logger .info (
137+ f" Total cluster memory: { resources ['memory' ] / (1024 ** 3 ):.2f} GB" )
138+ if 'object_store_memory' in resources :
139+ logger .info (
140+ f" Object store memory: { resources ['object_store_memory' ] / (1024 ** 3 ):.2f} GB" )
117141 except Exception as e :
118- logger .error (
119- f"Failed to get cluster resources information: { e } " )
142+ logger .warning (
143+ f"Could not retrieve cluster resources information: { e } " )
120144
121145 return True
122146
@@ -139,9 +163,17 @@ def connect_to_cluster(self, address: str = "auto") -> bool:
139163 logger .debug ("Ray already initialized, skipping..." )
140164 return True
141165
166+ # Set RAY_preallocate_plasma environment variable before initialization
167+ # Note: When connecting to existing cluster, this setting may not take effect
168+ # as the cluster was already initialized with its own settings
169+ os .environ ["RAY_preallocate_plasma" ] = str (
170+ self .preallocate_plasma ).lower ()
171+
142172 params = self .get_init_params (address = address )
143173
144174 logger .debug (f"Connecting to Ray cluster: { address } " )
175+ logger .debug (
176+ f" RAY_preallocate_plasma: { self .preallocate_plasma } " )
145177 ray .init (** params )
146178 logger .info ("✅ Successfully connected to Ray cluster" )
147179
@@ -183,6 +215,7 @@ def log_configuration(self):
183215 logger .debug (f" Plasma directory: { self .plasma_directory } " )
184216 logger .debug (f" ObjectStore memory: { self .object_store_memory_gb } GB" )
185217 logger .debug (f" Temp directory: { self .temp_dir } " )
218+ logger .debug (f" Preallocate plasma: { self .preallocate_plasma } " )
186219
187220 @classmethod
188221 def init_ray_for_worker (cls , address : str = "auto" ) -> bool :
0 commit comments