1313# limitations under the License.
1414
1515"""
16- The config sub-module contains the definition of the RayJobClusterConfigV2 dataclass,
16+ The config sub-module contains the definition of the RayJobClusterConfig dataclass,
1717which is used to specify resource requirements and other details when creating a
1818Cluster object.
1919"""
@@ -139,6 +139,16 @@ class RayJobClusterConfig:
139139 A list of V1Volume objects to add to the Cluster
140140 volume_mounts:
141141 A list of V1VolumeMount objects to add to the Cluster
142+ enable_gcs_ft:
143+ A boolean indicating whether to enable GCS fault tolerance.
144+ enable_usage_stats:
145+ A boolean indicating whether to capture and send Ray usage stats externally.
146+ redis_address:
147+ The address of the Redis server to use for GCS fault tolerance, required when enable_gcs_ft is True.
148+ redis_password_secret:
149+ Kubernetes secret reference containing Redis password. ex: {"name": "secret-name", "key": "password-key"}
150+ external_storage_namespace:
151+ The storage namespace to use for GCS fault tolerance. By default, KubeRay sets it to the UID of RayCluster.
142152 """
143153
144154 head_cpu_requests : Union [int , str ] = 2
@@ -165,8 +175,39 @@ class RayJobClusterConfig:
165175 annotations : Dict [str , str ] = field (default_factory = dict )
166176 volumes : list [V1Volume ] = field (default_factory = list )
167177 volume_mounts : list [V1VolumeMount ] = field (default_factory = list )
178+ enable_gcs_ft : bool = False
179+ enable_usage_stats : bool = False
180+ redis_address : Optional [str ] = None
181+ redis_password_secret : Optional [Dict [str , str ]] = None
182+ external_storage_namespace : Optional [str ] = None
168183
169184 def __post_init__ (self ):
185+ if self .enable_usage_stats :
186+ self .envs ["RAY_USAGE_STATS_ENABLED" ] = "1"
187+ else :
188+ self .envs ["RAY_USAGE_STATS_ENABLED" ] = "0"
189+
190+ if self .enable_gcs_ft :
191+ if not self .redis_address :
192+ raise ValueError (
193+ "redis_address must be provided when enable_gcs_ft is True"
194+ )
195+
196+ if self .redis_password_secret and not isinstance (
197+ self .redis_password_secret , dict
198+ ):
199+ raise ValueError (
200+ "redis_password_secret must be a dictionary with 'name' and 'key' fields"
201+ )
202+
203+ if self .redis_password_secret and (
204+ "name" not in self .redis_password_secret
205+ or "key" not in self .redis_password_secret
206+ ):
207+ raise ValueError (
208+ "redis_password_secret must contain both 'name' and 'key' fields"
209+ )
210+
170211 self ._validate_types ()
171212 self ._memory_to_string ()
172213 self ._validate_gpu_config (self .head_accelerators )
@@ -251,6 +292,11 @@ def build_ray_cluster_spec(self, cluster_name: str) -> Dict[str, Any]:
251292 "workerGroupSpecs" : [self ._build_worker_group_spec (cluster_name )],
252293 }
253294
295+ # Add GCS fault tolerance if enabled
296+ if self .enable_gcs_ft :
297+ gcs_ft_options = self ._build_gcs_ft_options ()
298+ ray_cluster_spec ["gcsFaultToleranceOptions" ] = gcs_ft_options
299+
254300 return ray_cluster_spec
255301
256302 def _build_head_group_spec (self ) -> Dict [str , Any ]:
@@ -453,3 +499,25 @@ def _generate_volumes(self) -> list:
453499 def _build_env_vars (self ) -> list :
454500 """Build environment variables list."""
455501 return [V1EnvVar (name = key , value = value ) for key , value in self .envs .items ()]
502+
503+ def _build_gcs_ft_options (self ) -> Dict [str , Any ]:
504+ """Build GCS fault tolerance options."""
505+ gcs_ft_options = {"redisAddress" : self .redis_address }
506+
507+ if (
508+ hasattr (self , "external_storage_namespace" )
509+ and self .external_storage_namespace
510+ ):
511+ gcs_ft_options ["externalStorageNamespace" ] = self .external_storage_namespace
512+
513+ if hasattr (self , "redis_password_secret" ) and self .redis_password_secret :
514+ gcs_ft_options ["redisPassword" ] = {
515+ "valueFrom" : {
516+ "secretKeyRef" : {
517+ "name" : self .redis_password_secret ["name" ],
518+ "key" : self .redis_password_secret ["key" ],
519+ }
520+ }
521+ }
522+
523+ return gcs_ft_options
0 commit comments