Skip to content

Commit 1fea9d0

Browse files
ethanwharrislexierule
authored andcommitted
[App] Fix idle timeout e2e (#16786)
1 parent a27af0f commit 1fea9d0

File tree

2 files changed

+19
-17
lines changed
  • src/lightning_app/core
  • tests/integrations_app/apps/idle_timeout

2 files changed

+19
-17
lines changed

src/lightning_app/core/work.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,10 @@ def _aggregate_status_timeout(self, statuses: List[Dict]) -> WorkStatus:
639639
return WorkStatus(**status, count=len(timeout_statuses))
640640

641641
def on_exit(self):
642-
"""Override this hook to add your logic when the work is exiting."""
642+
"""Override this hook to add your logic when the work is exiting.
643+
644+
Note: This hook is not guaranteed to be called when running in the cloud.
645+
"""
643646
pass
644647

645648
def stop(self):

tests/integrations_app/apps/idle_timeout/app.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from lightning_app import CloudCompute, LightningApp, LightningFlow, LightningWork
44
from lightning_app.storage.path import _artifacts_path, _filesystem
5-
from lightning_app.utilities.enum import WorkStageStatus, WorkStopReasons
5+
from lightning_app.utilities.enum import WorkStageStatus
66

77

88
class SourceFileWriterWork(LightningWork):
@@ -35,22 +35,21 @@ def run(self):
3535
if self.work.counter == 0:
3636
self.work.run()
3737

38-
elif (
39-
self.work.status.stage == WorkStageStatus.STOPPED
40-
and self.work.status.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER
41-
and self.make_check
42-
):
43-
succeeded_status = self.work.statuses[-3]
44-
stopped_status_pending = self.work.statuses[-2]
45-
stopped_status_sigterm = self.work.statuses[-1]
46-
assert succeeded_status.stage == WorkStageStatus.SUCCEEDED
47-
assert stopped_status_pending.stage == WorkStageStatus.STOPPED
48-
assert stopped_status_pending.reason == WorkStopReasons.PENDING
49-
assert stopped_status_sigterm.stage == WorkStageStatus.STOPPED
50-
assert stopped_status_sigterm.reason == WorkStopReasons.SIGTERM_SIGNAL_HANDLER
38+
elif self.work.status.stage == WorkStageStatus.STOPPED and self.make_check:
39+
succeeded_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.SUCCEEDED]
40+
# Ensure the work succeeded at some point
41+
assert len(succeeded_statuses) > 0
42+
succeeded_status = succeeded_statuses[-1]
43+
44+
stopped_statuses = [status for status in self.work.statuses if status.stage == WorkStageStatus.STOPPED]
45+
46+
# We want to check that the work started shutting down withing the required timeframe, so we take the first
47+
# status that has `stage == STOPPED`.
48+
stopped_status = stopped_statuses[0]
49+
5150
# Note: Account for the controlplane, k8s, SIGTERM handler delays.
52-
assert (stopped_status_pending.timestamp - succeeded_status.timestamp) < 20
53-
assert (stopped_status_sigterm.timestamp - stopped_status_pending.timestamp) < 120
51+
assert (stopped_status.timestamp - succeeded_status.timestamp) < 20
52+
5453
fs = _filesystem()
5554
destination_path = _artifacts_path(self.work) / pathlib.Path(*self.work.path.resolve().parts[1:])
5655
assert fs.exists(destination_path)

0 commit comments

Comments
 (0)