|
17 | 17 | """ |
18 | 18 | import os |
19 | 19 | from time import sleep |
| 20 | +import time |
20 | 21 | import codeflare_sdk |
21 | 22 | from kubernetes import client |
| 23 | +from kubernetes.client.rest import ApiException |
22 | 24 | import ipywidgets as widgets |
23 | 25 | from IPython.display import display, HTML, Javascript |
24 | 26 | import pandas as pd |
@@ -160,7 +162,6 @@ def _on_delete_button_click(b, classification_widget: widgets.ToggleButtons, df: |
160 | 162 |
|
161 | 163 | _delete_cluster(cluster_name, namespace) |
162 | 164 |
|
163 | | - sleep(2) # TODO: wait for the cluster to be deleted instead |
164 | 165 | with user_output: |
165 | 166 | user_output.clear_output() |
166 | 167 | print(f"Cluster {cluster_name} in the {namespace} namespace was deleted successfully.") |
@@ -209,34 +210,59 @@ def _on_list_jobs_button_click(b, classification_widget: widgets.ToggleButtons, |
209 | 210 | def _delete_cluster( |
210 | 211 | cluster_name: str, |
211 | 212 | namespace: str, |
| 213 | + timeout: int = 5, |
| 214 | + interval: int = 1, |
212 | 215 | ): |
213 | 216 | from .cluster import _check_aw_exists |
214 | | - if _check_aw_exists(cluster_name, namespace): |
215 | | - try: |
216 | | - config_check() |
217 | | - api_instance = client.CustomObjectsApi(api_config_handler()) |
| 217 | + |
| 218 | + try: |
| 219 | + config_check() |
| 220 | + api_instance = client.CustomObjectsApi(api_config_handler()) |
| 221 | + |
| 222 | + if _check_aw_exists(cluster_name, namespace): |
218 | 223 | api_instance.delete_namespaced_custom_object( |
219 | 224 | group="workload.codeflare.dev", |
220 | 225 | version="v1beta2", |
221 | 226 | namespace=namespace, |
222 | 227 | plural="appwrappers", |
223 | 228 | name=cluster_name, |
224 | 229 | ) |
225 | | - except Exception as e: |
226 | | - return _kube_api_error_handling(e) |
227 | | - else: |
228 | | - try: |
229 | | - config_check() |
230 | | - api_instance = client.CustomObjectsApi(api_config_handler()) |
| 230 | + group = "workload.codeflare.dev" |
| 231 | + version = "v1beta2" |
| 232 | + plural = "appwrappers" |
| 233 | + else: |
231 | 234 | api_instance.delete_namespaced_custom_object( |
232 | 235 | group="ray.io", |
233 | 236 | version="v1", |
234 | 237 | namespace=namespace, |
235 | 238 | plural="rayclusters", |
236 | 239 | name=cluster_name, |
237 | 240 | ) |
238 | | - except Exception as e: |
239 | | - return _kube_api_error_handling(e) |
| 241 | + group = "ray.io" |
| 242 | + version = "v1" |
| 243 | + plural = "rayclusters" |
| 244 | + |
| 245 | + # Wait for the resource to be deleted |
| 246 | + while True: |
| 247 | + try: |
| 248 | + api_instance.get_namespaced_custom_object( |
| 249 | + group=group, |
| 250 | + version=version, |
| 251 | + namespace=namespace, |
| 252 | + plural=plural, |
| 253 | + name=cluster_name, |
| 254 | + ) |
| 255 | + # Retry if resource still exists |
| 256 | + time.sleep(interval) |
| 257 | + timeout -= interval |
| 258 | + if timeout <= 0: |
| 259 | + raise TimeoutError(f"Timeout waiting for {cluster_name} to be deleted.") |
| 260 | + except ApiException as e: |
| 261 | + # Resource is deleted |
| 262 | + if e.status == 404: |
| 263 | + break |
| 264 | + except Exception as e: |
| 265 | + return _kube_api_error_handling(e) |
240 | 266 |
|
241 | 267 |
|
242 | 268 | def _fetch_cluster_data(namespace): |
|
0 commit comments