Skip to content

Commit cfca96a

Browse files
laurafitzgeraldkryanbeane
authored andcommitted
define a ray job class
1 parent f9f8671 commit cfca96a

File tree

1 file changed

+116
-0
lines changed
  • src/codeflare_sdk/ray/job

1 file changed

+116
-0
lines changed

src/codeflare_sdk/ray/job/job.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
from dataclasses import dataclass, field
2+
from typing import Dict, Any, Optional, Union, List
3+
from enum import Enum
4+
5+
class RayJobStatus(str, Enum):
6+
"""Status of a RayJob"""
7+
PENDING = "PENDING"
8+
RUNNING = "RUNNING"
9+
STOPPED = "STOPPED"
10+
SUCCEEDED = "SUCCEEDED"
11+
FAILED = "FAILED"
12+
13+
@dataclass
14+
class RayJobSpec:
15+
"""Specification for a RayJob Custom Resource"""
16+
17+
# Required fields
18+
entrypoint: str
19+
"""The command to execute for this job (e.g., "python script.py")"""
20+
21+
# Optional fields
22+
submission_id: Optional[str] = None
23+
"""Unique ID for the job submission. If not provided, one will be generated."""
24+
25+
runtime_env: Optional[Dict[str, Any]] = None
26+
"""Runtime environment configuration for the job, including:
27+
- working_dir: Directory containing files that your job will run in
28+
- pip: List of pip packages to install
29+
- conda: Conda environment specification
30+
- env_vars: Environment variables to set
31+
- py_modules: Python modules to include
32+
"""
33+
34+
metadata: Optional[Dict[str, str]] = None
35+
"""Arbitrary metadata to store with the job"""
36+
37+
entrypoint_num_cpus: Optional[Union[int, float]] = None
38+
"""Number of CPU cores to reserve for the entrypoint command"""
39+
40+
entrypoint_num_gpus: Optional[Union[int, float]] = None
41+
"""Number of GPUs to reserve for the entrypoint command"""
42+
43+
entrypoint_memory: Optional[int] = None
44+
"""Amount of memory to reserve for the entrypoint command"""
45+
46+
entrypoint_resources: Optional[Dict[str, float]] = None
47+
"""Custom resources to reserve for the entrypoint command"""
48+
49+
cluster_name: Optional[str] = None
50+
"""Name of the RayCluster to run this job on"""
51+
52+
cluster_namespace: Optional[str] = None
53+
"""Namespace of the RayCluster to run this job on"""
54+
55+
# Status fields (managed by the controller)
56+
status: RayJobStatus = field(default=RayJobStatus.PENDING)
57+
"""Current status of the job"""
58+
59+
message: Optional[str] = None
60+
"""Detailed status message"""
61+
62+
start_time: Optional[str] = None
63+
"""Time when the job started"""
64+
65+
end_time: Optional[str] = None
66+
"""Time when the job ended"""
67+
68+
driver_info: Optional[Dict[str, str]] = None
69+
"""Information about the job driver, including:
70+
- id: Driver ID
71+
- node_ip_address: IP address of the node running the driver
72+
- pid: Process ID of the driver
73+
"""
74+
75+
@dataclass
76+
class RayJob:
77+
"""RayJob Custom Resource Definition"""
78+
79+
api_version: str = "ray.io/v1"
80+
kind: str = "RayJob"
81+
82+
metadata: Dict[str, Any]
83+
"""Kubernetes metadata for the job"""
84+
85+
spec: RayJobSpec
86+
"""Job specification"""
87+
88+
status: Optional[Dict[str, Any]] = None
89+
"""Status of the job (managed by the controller)"""
90+
91+
def to_dict(self) -> Dict[str, Any]:
92+
"""Convert the RayJob to a dictionary suitable for Kubernetes API"""
93+
return {
94+
"apiVersion": self.api_version,
95+
"kind": self.kind,
96+
"metadata": self.metadata,
97+
"spec": {
98+
"entrypoint": self.spec.entrypoint,
99+
"submission_id": self.spec.submission_id,
100+
"runtime_env": self.spec.runtime_env,
101+
"metadata": self.spec.metadata,
102+
"entrypoint_num_cpus": self.spec.entrypoint_num_cpus,
103+
"entrypoint_num_gpus": self.spec.entrypoint_num_gpus,
104+
"entrypoint_memory": self.spec.entrypoint_memory,
105+
"entrypoint_resources": self.spec.entrypoint_resources,
106+
"cluster_name": self.spec.cluster_name,
107+
"cluster_namespace": self.spec.cluster_namespace,
108+
},
109+
"status": {
110+
"status": self.spec.status,
111+
"message": self.spec.message,
112+
"start_time": self.spec.start_time,
113+
"end_time": self.spec.end_time,
114+
"driver_info": self.spec.driver_info,
115+
} if self.status is None else self.status
116+
}

0 commit comments

Comments
 (0)