1+ """
2+ Defines the RayJobConfiguration dataclass for specifying KubeRay RayJob custom resources.
3+ """
4+
5+ from dataclasses import dataclass , field
6+ from typing import Dict , List , Optional , Union
7+
8+ from codeflare_sdk .ray .cluster .config import ClusterConfiguration
9+ import corev1_client # Placeholder for kubernetes.client.models.V1PodTemplateSpec
10+
11+ # Placeholder for V1PodTemplateSpec until actual import is resolved
12+ # from kubernetes.client.models import V1PodTemplateSpec
13+ # For now, using a generic Dict as a placeholder
14+ V1PodTemplateSpec = Dict [str , Any ]
15+
16+
17+ @dataclass
18+ class RayJobConfiguration :
19+ """
20+ Configuration for a KubeRay RayJob.
21+
22+ Args:
23+ name: Name of the RayJob.
24+ namespace: Namespace for the RayJob.
25+ entrypoint: Command to execute for the job.
26+ runtime_env_yaml: Runtime environment configuration as a YAML string.
27+ job_id: Optional ID for the job. Auto-generated if not set.
28+ active_deadline_seconds: Duration in seconds the job may be active.
29+ backoff_limit: Number of retries before marking job as failed.
30+ deletion_policy: Policy for resource deletion on job completion.
31+ Valid values: "DeleteCluster", "DeleteWorkers", "DeleteSelf", "DeleteNone".
32+ submission_mode: How the Ray job is submitted to the RayCluster.
33+ Valid values: "K8sJobMode", "HTTPMode", "InteractiveMode".
34+ managed_by: Controller managing the RayJob (e.g., "kueue.x-k8s.io/multikueue").
35+ ray_cluster_spec: Specification for the RayCluster if created by this RayJob.
36+ cluster_selector: Labels to select an existing RayCluster.
37+ submitter_pod_template: Pod template for the job submitter (if K8sJobMode).
38+ shutdown_after_job_finishes: Whether to delete the RayCluster after job completion.
39+ ttl_seconds_after_finished: TTL for RayCluster cleanup after job completion.
40+ suspend: Whether to suspend the RayJob (prevents RayCluster creation).
41+ metadata: Metadata for the RayJob.
42+ submitter_config_backoff_limit: BackoffLimit for the submitter Kubernetes Job.
43+ """
44+ name : str
45+ namespace : Optional [str ] = None
46+ entrypoint : str
47+ runtime_env_yaml : Optional [str ] = None
48+ job_id : Optional [str ] = None
49+ active_deadline_seconds : Optional [int ] = None
50+ backoff_limit : int = 0 # KubeRay default is 0
51+ deletion_policy : Optional [str ] = None # Needs validation: DeleteCluster, DeleteWorkers, DeleteSelf, DeleteNone
52+ submission_mode : str = "K8sJobMode" # KubeRay default
53+ managed_by : Optional [str ] = None
54+ ray_cluster_spec : Optional [ClusterConfiguration ] = None
55+ cluster_selector : Dict [str , str ] = field (default_factory = dict )
56+ submitter_pod_template : Optional [V1PodTemplateSpec ] = None # Kubernetes V1PodTemplateSpec
57+ shutdown_after_job_finishes : bool = True # Common default, KubeRay itself doesn't default this in RayJobSpec directly
58+ ttl_seconds_after_finished : int = 0 # KubeRay default
59+ suspend : bool = False
60+ metadata : Dict [str , str ] = field (default_factory = dict )
61+ submitter_config_backoff_limit : Optional [int ] = None
62+
63+
64+ def __post_init__ (self ):
65+ if self .deletion_policy and self .deletion_policy not in [
66+ "DeleteCluster" ,
67+ "DeleteWorkers" ,
68+ "DeleteSelf" ,
69+ "DeleteNone" ,
70+ ]:
71+ raise ValueError (
72+ "deletion_policy must be one of 'DeleteCluster', 'DeleteWorkers', 'DeleteSelf', or 'DeleteNone'"
73+ )
74+
75+ if self .submission_mode not in ["K8sJobMode" , "HTTPMode" , "InteractiveMode" ]:
76+ raise ValueError (
77+ "submission_mode must be one of 'K8sJobMode', 'HTTPMode', or 'InteractiveMode'"
78+ )
79+
80+ if self .managed_by and self .managed_by not in [
81+ "ray.io/kuberay-operator" ,
82+ "kueue.x-k8s.io/multikueue" ,
83+ ]:
84+ raise ValueError (
85+ "managed_by field value must be either 'ray.io/kuberay-operator' or 'kueue.x-k8s.io/multikueue'"
86+ )
87+
88+ if self .ray_cluster_spec and self .cluster_selector :
89+ raise ValueError ("Only one of ray_cluster_spec or cluster_selector can be provided." )
90+
91+ if not self .ray_cluster_spec and not self .cluster_selector and self .submission_mode != "InteractiveMode" :
92+ # In interactive mode, a cluster might already exist and the user connects to it.
93+ # Otherwise, a RayJob needs either a spec to create a cluster or a selector to find one.
94+ raise ValueError (
95+ "Either ray_cluster_spec (to create a new cluster) or cluster_selector (to use an existing one) must be specified unless in InteractiveMode."
96+ )
97+
98+ # TODO: Add validation for submitter_pod_template if submission_mode is K8sJobMode
99+ # TODO: Add type validation for all fields
100+ pass
0 commit comments