|
9 | 9 | import pytest |
10 | 10 | from frequenz.channels import Broadcast, Receiver, Sender, select, selected_from |
11 | 11 |
|
12 | | -from frequenz.sdk.actor import Actor, run |
| 12 | +from frequenz.sdk.actor import Actor, RestartActorException, run |
13 | 13 |
|
14 | 14 | from ..conftest import actor_restart_limit |
15 | 15 |
|
@@ -78,6 +78,32 @@ async def _run(self) -> None: |
78 | 78 | print(f"{self} done (should not happen)") |
79 | 79 |
|
80 | 80 |
|
| 81 | +class RaiseRestartExceptionActor(BaseTestActor): |
| 82 | + """Actor that raises an RestartActorException as soon as it receives a message.""" |
| 83 | + |
| 84 | + def __init__(self, recv: Receiver[int], raise_count: int) -> None: |
| 85 | + """Create an instance. |
| 86 | +
|
| 87 | + Args: |
| 88 | + recv: A channel receiver for int data. |
| 89 | + raise_count: How many time raise RestartActorException |
| 90 | + """ |
| 91 | + super().__init__(name="test") |
| 92 | + self._recv = recv |
| 93 | + self._raise_count = raise_count |
| 94 | + |
| 95 | + async def _run(self) -> None: |
| 96 | + """Start the actor and crash upon receiving a message.""" |
| 97 | + print(f"{self} started") |
| 98 | + self.inc_restart_count() |
| 99 | + async for _ in self._recv: |
| 100 | + if self._raise_count <= 0: |
| 101 | + break |
| 102 | + self._raise_count -= 1 |
| 103 | + raise RestartActorException("Actor should restarts") |
| 104 | + print(f"{self} done") |
| 105 | + |
| 106 | + |
81 | 107 | ACTOR_INFO = ("frequenz.sdk.actor._actor", 20) |
82 | 108 | ACTOR_ERROR = ("frequenz.sdk.actor._actor", 40) |
83 | 109 | RUN_INFO = ("frequenz.sdk.actor._run_utils", 20) |
@@ -256,6 +282,82 @@ async def test_restart_on_unhandled_exception( |
256 | 282 | assert filtered_logs == expected_log |
257 | 283 |
|
258 | 284 |
|
| 285 | +@pytest.mark.parametrize("restart_num", [1]) |
| 286 | +async def test_restart_on_restart_exception( |
| 287 | + restart_num: int, caplog: pytest.LogCaptureFixture |
| 288 | +) -> None: |
| 289 | + """Create a faulty actor and expect it to restart because it raises an exception. |
| 290 | +
|
| 291 | + Also test this works with different restart limits. |
| 292 | +
|
| 293 | + Args: |
| 294 | + restart_num: The restart limit to use. |
| 295 | + caplog: The log capture fixture. |
| 296 | + """ |
| 297 | + relevant_loggers = {"frequenz.sdk.actor._actor", "frequenz.sdk.actor._run_utils"} |
| 298 | + for logger in relevant_loggers: |
| 299 | + caplog.set_level("DEBUG", logger=logger) |
| 300 | + |
| 301 | + channel: Broadcast[int] = Broadcast(name="channel") |
| 302 | + |
| 303 | + # We need some timeout, 1 second for each restart should be enough. |
| 304 | + # There should be no restart delay. |
| 305 | + expected_wait_time = timedelta(seconds=restart_num + 1.0) |
| 306 | + async with asyncio.timeout(expected_wait_time.total_seconds()): |
| 307 | + actor = RaiseRestartExceptionActor( |
| 308 | + channel.new_receiver(), |
| 309 | + raise_count=restart_num, |
| 310 | + ) |
| 311 | + for i in range(restart_num + 1): |
| 312 | + await channel.new_sender().send(i) |
| 313 | + |
| 314 | + await run(actor) |
| 315 | + await actor.wait() |
| 316 | + |
| 317 | + assert actor.is_running is False |
| 318 | + assert BaseTestActor.restart_count == restart_num |
| 319 | + expected_log = [ |
| 320 | + (*RUN_INFO, "Starting 1 actor(s)..."), |
| 321 | + (*RUN_INFO, "Actor RaiseRestartExceptionActor[test]: Starting..."), |
| 322 | + (*ACTOR_INFO, "Actor RaiseRestartExceptionActor[test]: Started."), |
| 323 | + ] |
| 324 | + for i in range(restart_num): |
| 325 | + expected_log.append( |
| 326 | + ( |
| 327 | + *ACTOR_INFO, |
| 328 | + "Actor RaiseRestartExceptionActor[test]: Restarting.", |
| 329 | + ), |
| 330 | + ) |
| 331 | + expected_log.extend( |
| 332 | + [ |
| 333 | + ( |
| 334 | + *ACTOR_INFO, |
| 335 | + "Actor RaiseRestartExceptionActor[test]: _run() returned without error.", |
| 336 | + ), |
| 337 | + ( |
| 338 | + *ACTOR_INFO, |
| 339 | + "Actor RaiseRestartExceptionActor[test]: Stopped.", |
| 340 | + ), |
| 341 | + ( |
| 342 | + *RUN_INFO, |
| 343 | + "Actor RaiseRestartExceptionActor[test]: Finished normally.", |
| 344 | + ), |
| 345 | + ( |
| 346 | + *RUN_INFO, |
| 347 | + "All 1 actor(s) finished.", |
| 348 | + ), |
| 349 | + ] |
| 350 | + ) |
| 351 | + print("caplog.record_tuples:", caplog.record_tuples) |
| 352 | + # This is an ugly hack. There seem to be some issues with asyncio and caplog, maybe |
| 353 | + # pytest-asyncio, that reports some pending tasks from an unrelated test when tested |
| 354 | + # inside QEMU (suggesting also some timing issue when things run very slow). |
| 355 | + filtered_logs = [r for r in caplog.record_tuples if r[0] in relevant_loggers] |
| 356 | + print("filtered_logs:", filtered_logs) |
| 357 | + print("expected_log:", expected_log) |
| 358 | + assert filtered_logs == expected_log |
| 359 | + |
| 360 | + |
259 | 361 | async def test_does_not_restart_on_normal_exit( |
260 | 362 | actor_auto_restart_once: None, # pylint: disable=unused-argument |
261 | 363 | caplog: pytest.LogCaptureFixture, |
|
0 commit comments