Skip to content

Commit 365fb0d

Browse files
Add a restart delay to actors to avoid spam-restarting of buggy actors
Signed-off-by: Christian Parpart <[email protected]>
1 parent 3562004 commit 365fb0d

File tree

3 files changed

+40
-2
lines changed

3 files changed

+40
-2
lines changed

RELEASE_NOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ This version ships an experimental version of the **Power Manager**, adds prelim
2525

2626
- Move `microgrid.ComponentGraph` class to `microgrid.component_graph.ComponentGraph`, exposing only the high level interface functions through the `microgrid` package.
2727

28+
- An actor that is crashing will no longer instantly restart but induce an artificial delay to avoid potential spam-restarting.
29+
2830
## New Features
2931

3032
- New and improved documentation.

src/frequenz/sdk/actor/_actor.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,20 @@ def start(self) -> None:
5454
async def _run(self) -> None:
5555
"""Run this actor's logic."""
5656

57+
async def _delay_if_restart(self, iteration: int) -> None:
58+
"""Delay the restart of this actor's n'th iteration.
59+
60+
Args:
61+
iteration: The current iteration of the restart.
62+
"""
63+
# NB: I think it makes sense (in the future) to think about deminishing returns
64+
# the longer the actor has been running.
65+
# Not just for the restart-delay but actually for the n_restarts counter as well.
66+
if iteration > 0:
67+
delay: int = 1 << iteration # delay for 1, 2, 4, 8, ... seconds
68+
_logger.info("Actor %s: Waiting %s seconds...", self, delay)
69+
await asyncio.sleep(delay)
70+
5771
async def _run_loop(self) -> None:
5872
"""Run this actor's task in a loop until `_restart_limit` is reached.
5973
@@ -67,6 +81,7 @@ async def _run_loop(self) -> None:
6781
n_restarts = 0
6882
while True:
6983
try:
84+
await self._delay_if_restart(n_restarts)
7085
await self._run()
7186
_logger.info("Actor %s: _run() returned without error.", self)
7287
except asyncio.CancelledError:

tests/actor/test_actor.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,21 @@ async def test_basic_actor(caplog: pytest.LogCaptureFixture) -> None:
194194
]
195195

196196

197-
@pytest.mark.parametrize("restart_limit", [0, 1, 2, 10])
197+
def expected_wait_time(iterations: int) -> int:
198+
"""Calculate the expected wait time for a given iteration.
199+
200+
Args:
201+
iterations: The iteration to calculate the wait time for.
202+
203+
Returns:
204+
The expected wait time in seconds.
205+
"""
206+
if iterations == 0:
207+
return 0
208+
return expected_wait_time(iterations - 1) + 1 << iterations
209+
210+
211+
@pytest.mark.parametrize("restart_limit", [0, 1, 2, 3])
198212
async def test_restart_on_unhandled_exception(
199213
restart_limit: int, caplog: pytest.LogCaptureFixture
200214
) -> None:
@@ -211,7 +225,8 @@ async def test_restart_on_unhandled_exception(
211225

212226
channel: Broadcast[int] = Broadcast("channel")
213227

214-
async with asyncio.timeout(2.0):
228+
async with asyncio.timeout(1 + expected_wait_time(restart_limit)):
229+
print(f"Expecting a wait time of {expected_wait_time(restart_limit)} seconds")
215230
with actor_restart_limit(restart_limit):
216231
actor = RaiseExceptionActor(
217232
channel.new_receiver(),
@@ -240,6 +255,10 @@ async def test_restart_on_unhandled_exception(
240255
*ACTOR_INFO,
241256
f"Actor test: Restarting ({i}/{restart_limit})...",
242257
),
258+
(
259+
*ACTOR_INFO,
260+
f"Actor RaiseExceptionActor[test]: Waiting {1 << (1 + i)} seconds...",
261+
),
243262
]
244263
)
245264
expected_log.extend(
@@ -260,6 +279,8 @@ async def test_restart_on_unhandled_exception(
260279
(*RUN_INFO, "All 1 actor(s) finished."),
261280
]
262281
)
282+
print("expected_log:", expected_log)
283+
print("caplog.record_tuples:", caplog.record_tuples)
263284
assert caplog.record_tuples == expected_log
264285

265286

0 commit comments

Comments
 (0)