|
| 1 | +from dataclasses import dataclass, field |
| 2 | +from typing import Dict, Any, Optional, Union, List |
| 3 | +from enum import Enum |
| 4 | + |
| 5 | +class RayJobStatus(str, Enum): |
| 6 | + """Status of a RayJob""" |
| 7 | + PENDING = "PENDING" |
| 8 | + RUNNING = "RUNNING" |
| 9 | + STOPPED = "STOPPED" |
| 10 | + SUCCEEDED = "SUCCEEDED" |
| 11 | + FAILED = "FAILED" |
| 12 | + |
| 13 | +@dataclass |
| 14 | +class RayJobSpec: |
| 15 | + """Specification for a RayJob Custom Resource""" |
| 16 | + |
| 17 | + # Required fields |
| 18 | + entrypoint: str |
| 19 | + """The command to execute for this job (e.g., "python script.py")""" |
| 20 | + |
| 21 | + # Optional fields |
| 22 | + submission_id: Optional[str] = None |
| 23 | + """Unique ID for the job submission. If not provided, one will be generated.""" |
| 24 | + |
| 25 | + runtime_env: Optional[Dict[str, Any]] = None |
| 26 | + """Runtime environment configuration for the job, including: |
| 27 | + - working_dir: Directory containing files that your job will run in |
| 28 | + - pip: List of pip packages to install |
| 29 | + - conda: Conda environment specification |
| 30 | + - env_vars: Environment variables to set |
| 31 | + - py_modules: Python modules to include |
| 32 | + """ |
| 33 | + |
| 34 | + metadata: Optional[Dict[str, str]] = None |
| 35 | + """Arbitrary metadata to store with the job""" |
| 36 | + |
| 37 | + entrypoint_num_cpus: Optional[Union[int, float]] = None |
| 38 | + """Number of CPU cores to reserve for the entrypoint command""" |
| 39 | + |
| 40 | + entrypoint_num_gpus: Optional[Union[int, float]] = None |
| 41 | + """Number of GPUs to reserve for the entrypoint command""" |
| 42 | + |
| 43 | + entrypoint_memory: Optional[int] = None |
| 44 | + """Amount of memory to reserve for the entrypoint command""" |
| 45 | + |
| 46 | + entrypoint_resources: Optional[Dict[str, float]] = None |
| 47 | + """Custom resources to reserve for the entrypoint command""" |
| 48 | + |
| 49 | + cluster_name: Optional[str] = None |
| 50 | + """Name of the RayCluster to run this job on""" |
| 51 | + |
| 52 | + cluster_namespace: Optional[str] = None |
| 53 | + """Namespace of the RayCluster to run this job on""" |
| 54 | + |
| 55 | + # Status fields (managed by the controller) |
| 56 | + status: RayJobStatus = field(default=RayJobStatus.PENDING) |
| 57 | + """Current status of the job""" |
| 58 | + |
| 59 | + message: Optional[str] = None |
| 60 | + """Detailed status message""" |
| 61 | + |
| 62 | + start_time: Optional[str] = None |
| 63 | + """Time when the job started""" |
| 64 | + |
| 65 | + end_time: Optional[str] = None |
| 66 | + """Time when the job ended""" |
| 67 | + |
| 68 | + driver_info: Optional[Dict[str, str]] = None |
| 69 | + """Information about the job driver, including: |
| 70 | + - id: Driver ID |
| 71 | + - node_ip_address: IP address of the node running the driver |
| 72 | + - pid: Process ID of the driver |
| 73 | + """ |
| 74 | + |
| 75 | +@dataclass |
| 76 | +class RayJob: |
| 77 | + """RayJob Custom Resource Definition""" |
| 78 | + |
| 79 | + api_version: str = "ray.io/v1" |
| 80 | + kind: str = "RayJob" |
| 81 | + |
| 82 | + metadata: Dict[str, Any] |
| 83 | + """Kubernetes metadata for the job""" |
| 84 | + |
| 85 | + spec: RayJobSpec |
| 86 | + """Job specification""" |
| 87 | + |
| 88 | + status: Optional[Dict[str, Any]] = None |
| 89 | + """Status of the job (managed by the controller)""" |
| 90 | + |
| 91 | + def to_dict(self) -> Dict[str, Any]: |
| 92 | + """Convert the RayJob to a dictionary suitable for Kubernetes API""" |
| 93 | + return { |
| 94 | + "apiVersion": self.api_version, |
| 95 | + "kind": self.kind, |
| 96 | + "metadata": self.metadata, |
| 97 | + "spec": { |
| 98 | + "entrypoint": self.spec.entrypoint, |
| 99 | + "submission_id": self.spec.submission_id, |
| 100 | + "runtime_env": self.spec.runtime_env, |
| 101 | + "metadata": self.spec.metadata, |
| 102 | + "entrypoint_num_cpus": self.spec.entrypoint_num_cpus, |
| 103 | + "entrypoint_num_gpus": self.spec.entrypoint_num_gpus, |
| 104 | + "entrypoint_memory": self.spec.entrypoint_memory, |
| 105 | + "entrypoint_resources": self.spec.entrypoint_resources, |
| 106 | + "cluster_name": self.spec.cluster_name, |
| 107 | + "cluster_namespace": self.spec.cluster_namespace, |
| 108 | + }, |
| 109 | + "status": { |
| 110 | + "status": self.spec.status, |
| 111 | + "message": self.spec.message, |
| 112 | + "start_time": self.spec.start_time, |
| 113 | + "end_time": self.spec.end_time, |
| 114 | + "driver_info": self.spec.driver_info, |
| 115 | + } if self.status is None else self.status |
| 116 | + } |
0 commit comments