@@ -50,6 +50,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
5050cluster setup queue, a list of all existing clusters, and the user's working namespace.
5151"""
5252
53+ import re
5354from time import sleep
5455from typing import List, Optional, Tuple, Dict
5556
@@ -73,11 +74,13 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
7374 RayClusterStatus,
7475)
7576from kubernetes import client, config
77+ from kubernetes.utils import parse_quantity
7678import yaml
7779import os
7880import requests
7981
8082from kubernetes import config
83+ from kubernetes.client.rest import ApiException
8184
8285
8386class Cluster:
@@ -216,6 +219,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
216219 write_to_file = self.config.write_to_file
217220 verify_tls = self.config.verify_tls
218221 local_queue = self.config.local_queue
222+ labels = self.config.labels
219223 return generate_appwrapper(
220224 name=name,
221225 namespace=namespace,
@@ -240,6 +244,7 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
240244 write_to_file=write_to_file,
241245 verify_tls=verify_tls,
242246 local_queue=local_queue,
247+ labels=labels,
243248 )
244249
245250 # creates a new cluster with the provided or default spec
@@ -248,6 +253,10 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
248253 Applies the AppWrapper yaml, pushing the resource request onto
249254 the MCAD queue.
250255 """
256+
257+ # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
258+ self._throw_for_no_raycluster()
259+
251260 namespace = self.config.namespace
252261
253262 try:
@@ -278,12 +287,32 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
278287 except Exception as e: # pragma: no cover
279288 return _kube_api_error_handling(e)
280289
290+ def _throw_for_no_raycluster(self):
291+ api_instance = client.CustomObjectsApi(api_config_handler())
292+ try:
293+ api_instance.list_namespaced_custom_object(
294+ group="ray.io",
295+ version="v1",
296+ namespace=self.config.namespace,
297+ plural="rayclusters",
298+ )
299+ except ApiException as e:
300+ if e.status == 404:
301+ raise RuntimeError(
302+ "RayCluster CustomResourceDefinition unavailable contact your administrator."
303+ )
304+ else:
305+ raise RuntimeError(
306+ "Failed to get RayCluster CustomResourceDefinition: " + str(e)
307+ )
308+
281309 def down(self):
282310 """
283311 Deletes the AppWrapper yaml, scaling-down and deleting all resources
284312 associated with the cluster.
285313 """
286314 namespace = self.config.namespace
315+ self._throw_for_no_raycluster()
287316 try:
288317 config_check()
289318 api_instance = client.CustomObjectsApi(api_config_handler())
@@ -520,26 +549,18 @@ <h1 class="title">Module <code>codeflare_sdk.cluster.cluster</code></h1>
520549 namespace=rc["metadata"]["namespace"],
521550 machine_types=machine_types,
522551 num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
523- min_cpus=int(
524- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
525- "resources"
526- ]["requests"]["cpu"]
527- ),
528- max_cpus=int(
529- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
530- "resources"
531- ]["limits"]["cpu"]
532- ),
533- min_memory=int(
534- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
535- "resources"
536- ]["requests"]["memory"][:-1]
537- ),
538- max_memory=int(
539- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
540- "resources"
541- ]["limits"]["memory"][:-1]
542- ),
552+ min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
553+ "containers"
554+ ][0]["resources"]["requests"]["cpu"],
555+ max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
556+ "containers"
557+ ][0]["resources"]["limits"]["cpu"],
558+ min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
559+ "containers"
560+ ][0]["resources"]["requests"]["memory"],
561+ max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
562+ "containers"
563+ ][0]["resources"]["limits"]["memory"],
543564 num_gpus=int(
544565 rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
545566 "resources"
@@ -1265,6 +1286,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
12651286 write_to_file = self.config.write_to_file
12661287 verify_tls = self.config.verify_tls
12671288 local_queue = self.config.local_queue
1289+ labels = self.config.labels
12681290 return generate_appwrapper(
12691291 name=name,
12701292 namespace=namespace,
@@ -1289,6 +1311,7 @@ <h2 class="section-title" id="header-classes">Classes</h2>
12891311 write_to_file=write_to_file,
12901312 verify_tls=verify_tls,
12911313 local_queue=local_queue,
1314+ labels=labels,
12921315 )
12931316
12941317 # creates a new cluster with the provided or default spec
@@ -1297,6 +1320,10 @@ <h2 class="section-title" id="header-classes">Classes</h2>
12971320 Applies the AppWrapper yaml, pushing the resource request onto
12981321 the MCAD queue.
12991322 """
1323+
1324+ # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
1325+ self._throw_for_no_raycluster()
1326+
13001327 namespace = self.config.namespace
13011328
13021329 try:
@@ -1327,12 +1354,32 @@ <h2 class="section-title" id="header-classes">Classes</h2>
13271354 except Exception as e: # pragma: no cover
13281355 return _kube_api_error_handling(e)
13291356
1357+ def _throw_for_no_raycluster(self):
1358+ api_instance = client.CustomObjectsApi(api_config_handler())
1359+ try:
1360+ api_instance.list_namespaced_custom_object(
1361+ group="ray.io",
1362+ version="v1",
1363+ namespace=self.config.namespace,
1364+ plural="rayclusters",
1365+ )
1366+ except ApiException as e:
1367+ if e.status == 404:
1368+ raise RuntimeError(
1369+ "RayCluster CustomResourceDefinition unavailable contact your administrator."
1370+ )
1371+ else:
1372+ raise RuntimeError(
1373+ "Failed to get RayCluster CustomResourceDefinition: " + str(e)
1374+ )
1375+
13301376 def down(self):
13311377 """
13321378 Deletes the AppWrapper yaml, scaling-down and deleting all resources
13331379 associated with the cluster.
13341380 """
13351381 namespace = self.config.namespace
1382+ self._throw_for_no_raycluster()
13361383 try:
13371384 config_check()
13381385 api_instance = client.CustomObjectsApi(api_config_handler())
@@ -1569,26 +1616,18 @@ <h2 class="section-title" id="header-classes">Classes</h2>
15691616 namespace=rc["metadata"]["namespace"],
15701617 machine_types=machine_types,
15711618 num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
1572- min_cpus=int(
1573- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1574- "resources"
1575- ]["requests"]["cpu"]
1576- ),
1577- max_cpus=int(
1578- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1579- "resources"
1580- ]["limits"]["cpu"]
1581- ),
1582- min_memory=int(
1583- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1584- "resources"
1585- ]["requests"]["memory"][:-1]
1586- ),
1587- max_memory=int(
1588- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1589- "resources"
1590- ]["limits"]["memory"][:-1]
1591- ),
1619+ min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1620+ "containers"
1621+ ][0]["resources"]["requests"]["cpu"],
1622+ max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1623+ "containers"
1624+ ][0]["resources"]["limits"]["cpu"],
1625+ min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1626+ "containers"
1627+ ][0]["resources"]["requests"]["memory"],
1628+ max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1629+ "containers"
1630+ ][0]["resources"]["limits"]["memory"],
15921631 num_gpus=int(
15931632 rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
15941633 "resources"
@@ -1811,6 +1850,7 @@ <h3>Methods</h3>
18111850 write_to_file = self.config.write_to_file
18121851 verify_tls = self.config.verify_tls
18131852 local_queue = self.config.local_queue
1853+ labels = self.config.labels
18141854 return generate_appwrapper(
18151855 name=name,
18161856 namespace=namespace,
@@ -1835,6 +1875,7 @@ <h3>Methods</h3>
18351875 write_to_file=write_to_file,
18361876 verify_tls=verify_tls,
18371877 local_queue=local_queue,
1878+ labels=labels,
18381879 )</ code > </ pre >
18391880</ details >
18401881</ dd >
@@ -1870,6 +1911,7 @@ <h3>Methods</h3>
18701911 associated with the cluster.
18711912 """
18721913 namespace = self.config.namespace
1914+ self._throw_for_no_raycluster()
18731915 try:
18741916 config_check()
18751917 api_instance = client.CustomObjectsApi(api_config_handler())
@@ -1944,26 +1986,18 @@ <h3>Methods</h3>
19441986 namespace=rc["metadata"]["namespace"],
19451987 machine_types=machine_types,
19461988 num_workers=rc["spec"]["workerGroupSpecs"][0]["minReplicas"],
1947- min_cpus=int(
1948- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1949- "resources"
1950- ]["requests"]["cpu"]
1951- ),
1952- max_cpus=int(
1953- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1954- "resources"
1955- ]["limits"]["cpu"]
1956- ),
1957- min_memory=int(
1958- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1959- "resources"
1960- ]["requests"]["memory"][:-1]
1961- ),
1962- max_memory=int(
1963- rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
1964- "resources"
1965- ]["limits"]["memory"][:-1]
1966- ),
1989+ min_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1990+ "containers"
1991+ ][0]["resources"]["requests"]["cpu"],
1992+ max_cpus=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1993+ "containers"
1994+ ][0]["resources"]["limits"]["cpu"],
1995+ min_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1996+ "containers"
1997+ ][0]["resources"]["requests"]["memory"],
1998+ max_memory=rc["spec"]["workerGroupSpecs"][0]["template"]["spec"][
1999+ "containers"
2000+ ][0]["resources"]["limits"]["memory"],
19672001 num_gpus=int(
19682002 rc["spec"]["workerGroupSpecs"][0]["template"]["spec"]["containers"][0][
19692003 "resources"
@@ -2168,6 +2202,10 @@ <h3>Methods</h3>
21682202 Applies the AppWrapper yaml, pushing the resource request onto
21692203 the MCAD queue.
21702204 """
2205+
2206+ # check if RayCluster CustomResourceDefinition exists if not throw RuntimeError
2207+ self._throw_for_no_raycluster()
2208+
21712209 namespace = self.config.namespace
21722210
21732211 try:
0 commit comments