Skip to content

Commit 08dcc16

Browse files
committed
Add Lepton RayCluster tests
Signed-Off-By: Robert Clark <[email protected]>
1 parent 9c02ff5 commit 08dcc16

File tree

2 files changed

+676
-6
lines changed

2 files changed

+676
-6
lines changed

nemo_run/run/ray/lepton.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
import warnings
2323
from dataclasses import dataclass
2424
from ray.job_submission import JobSubmissionClient
25-
from rich.pretty import Pretty
25+
from rich.pretty import pretty_repr
2626
from typing import Any, Optional, TypeAlias
2727

2828
from leptonai.api.v1.types.affinity import LeptonResourceAffinity
@@ -134,7 +134,7 @@ def status(
134134

135135
if display and status_dict:
136136
cluster = client.raycluster.get(self.name)
137-
logger.info(Pretty(cluster))
137+
logger.info(pretty_repr(vars(cluster), expand_all=True))
138138

139139
return status_dict
140140

@@ -176,6 +176,7 @@ def create(
176176
if len(name) > 35:
177177
logger.warning("length of name exceeds 35 characters. Shortening...")
178178
name = name[:34]
179+
self.name = name
179180

180181
executor = self.executor
181182
client = APIClient()
@@ -305,15 +306,15 @@ def delete(
305306
Returns
306307
-------
307308
bool
308-
*True* if the job was successfully cancelled (or already gone), *False* otherwise.
309+
*True* if the cluster was confirmed deleted (or already gone), *False* otherwise.
309310
"""
310311
name = self.name
311312
logger.debug(f"Deleting RayCluster '{name}'")
312313

313314
client = APIClient()
314315
status = self._status(client)
315316

316-
if status["cluster_name"] is None:
317+
if status is None or status["cluster_name"] is None:
317318
logger.warning(f"RayCluster '{name}' does not exist or is already deleted")
318319
return True
319320

@@ -331,6 +332,10 @@ def delete(
331332

332333
if not status or status["cluster_name"] is None:
333334
logger.info(f"RayCluster '{name}' successfully deleted")
335+
336+
if name in self.cluster_map:
337+
del self.cluster_map[name]
338+
334339
return True
335340

336341
logger.debug(f"RayCluster '{name}' is still being deleted, waiting...")
@@ -443,8 +448,6 @@ def _ray_client(self, create_if_not_exists: bool = False) -> APIClient:
443448
cluster = LeptonRayCluster(
444449
name=name,
445450
executor=self.executor,
446-
ray_version=self.executor.ray_version,
447-
head_resource_shape=self.executor.head_resource_shape,
448451
)
449452
cluster.create()
450453
logger.info(f"Waiting for RayCluster '{name}' to be ready...")

0 commit comments

Comments
 (0)