Skip to content

Commit 4b849a6

Browse files
Tony Kaofacebook-github-bot
authored andcommitted
torchx support early validation before workspace build
Summary: add `_pre_build_validate()` hook for torchx scheduler to perform app validation before build workspace step. This earlier validation enables detecting issue sooner without the need to wait for build workspace to complete. This change only exposes the pre_build_validate() hook for torchx scheduler and there is no change to existing behavior (validation will continue to perform after build workspace if required). Subsequent change will change the behavior within specific scheduler if validation can be moved from validate to pre_build_validate. additional change to add event logging for build_workspace_and_update_role call. Differential Revision: D69463377
1 parent 126fc80 commit 4b849a6

File tree

2 files changed

+25
-4
lines changed

2 files changed

+25
-4
lines changed

torchx/runner/api.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,14 @@ def dryrun(
412412
):
413413
sched = self._scheduler(scheduler)
414414
resolved_cfg = sched.run_opts().resolve(cfg)
415+
416+
# early validation before build workspace
417+
with log_event(
418+
"pre_build_validate",
419+
scheduler,
420+
):
421+
sched._pre_build_validate(app, scheduler, resolved_cfg)
422+
415423
if workspace and isinstance(sched, WorkspaceMixin):
416424
role = app.roles[0]
417425
old_img = role.image
@@ -420,7 +428,13 @@ def dryrun(
420428
logger.info(
421429
'To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.'
422430
)
423-
sched.build_workspace_and_update_role(role, workspace, resolved_cfg)
431+
with log_event(
432+
"build_workspace_and_update_role",
433+
scheduler,
434+
) as ctx:
435+
sched.build_workspace_and_update_role(role, workspace, resolved_cfg)
436+
ctx._torchx_event.app_image = role.image
437+
ctx._torchx_event.workspace = workspace
424438

425439
if old_img != role.image:
426440
logger.info(

torchx/schedulers/api.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -337,12 +337,19 @@ def log_iter(
337337
f"{self.__class__.__qualname__} does not support application log iteration"
338338
)
339339

340+
def _pre_build_validate(self, app: AppDef, scheduler: str, cfg: T) -> None:
341+
"""
342+
validates before workspace build whether application is consistent with the scheduler.
343+
344+
Raises error if application is not compatible with scheduler
345+
"""
346+
pass
347+
340348
def _validate(self, app: AppDef, scheduler: str, cfg: T) -> None:
341349
"""
342-
Validates whether application is consistent with the scheduler.
350+
Validates after workspace build whether application is consistent with the scheduler.
343351
344-
Raises:
345-
ValueError: if application is not compatible with scheduler
352+
Raises error if application is not compatible with scheduler
346353
"""
347354
for role in app.roles:
348355
if role.resource == NULL_RESOURCE:

0 commit comments

Comments
 (0)