Skip to content

Commit fc352b2

Browse files
authored
check for nursery misnesting on task exit (#3307)
* check for nursery misnesting on task exit ... not working * forcefully abort children of unclosed nurseries to avoid internalerror * call Runner.task_exited on aborted tasks to get errors for aborted tasks and handle nested scenarios (needs test) * new approach * suppress Cancelled if a CancelScope is abandoned by misnesting * add example of losing exception from abandoned task * add newsfragment
1 parent 5bfba27 commit fc352b2

File tree

4 files changed

+159
-6
lines changed

4 files changed

+159
-6
lines changed

newsfragments/3307.misc.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
When misnesting nurseries you now get a helpful :exc:`RuntimeError` instead of a catastrophic :exc:`TrioInternalError`.

src/trio/_core/_run.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -678,6 +678,9 @@ def _close(self, exc: BaseException | None) -> BaseException | None:
678678
exc is not None
679679
and self._cancel_status.effectively_cancelled
680680
and not self._cancel_status.parent_cancellation_is_visible_to_us
681+
) or (
682+
scope_task._cancel_status is not self._cancel_status
683+
and self._cancel_status.abandoned_by_misnesting
681684
):
682685
if isinstance(exc, Cancelled):
683686
self.cancelled_caught = True
@@ -1261,6 +1264,9 @@ def _child_finished(
12611264
outcome: Outcome[object],
12621265
) -> None:
12631266
self._children.remove(task)
1267+
if self._closed and not hasattr(self, "_pending_excs"):
1268+
# We're abandoned by misnested nurseries, the result of the task is lost.
1269+
return
12641270
if isinstance(outcome, Error):
12651271
self._add_exc(
12661272
outcome.error,
@@ -1321,7 +1327,7 @@ def aborted(raise_cancel: _core.RaiseCancelT) -> Abort:
13211327
self._add_exc(exc, reason=None)
13221328

13231329
popped = self._parent_task._child_nurseries.pop()
1324-
assert popped is self
1330+
assert popped is self, "Nursery misnesting detected!"
13251331
if self._pending_excs:
13261332
try:
13271333
if not self._strict_exception_groups and len(self._pending_excs) == 1:
@@ -2007,6 +2013,17 @@ async def python_wrapper(orig_coro: Awaitable[RetT]) -> RetT:
20072013
return task
20082014

20092015
def task_exited(self, task: Task, outcome: Outcome[object]) -> None:
2016+
if task._child_nurseries:
2017+
for nursery in task._child_nurseries:
2018+
nursery.cancel_scope._cancel(
2019+
CancelReason(
2020+
source="nursery",
2021+
reason="Parent Task exited prematurely, abandoning this nursery without exiting it properly.",
2022+
source_task=repr(task),
2023+
)
2024+
)
2025+
nursery._closed = True
2026+
20102027
# break parking lots associated with the exiting task
20112028
if task in GLOBAL_PARKING_LOT_BREAKER:
20122029
for lot in GLOBAL_PARKING_LOT_BREAKER[task]:
@@ -2017,7 +2034,8 @@ def task_exited(self, task: Task, outcome: Outcome[object]) -> None:
20172034
task._cancel_status is not None
20182035
and task._cancel_status.abandoned_by_misnesting
20192036
and task._cancel_status.parent is None
2020-
):
2037+
) or task._child_nurseries:
2038+
reason = "Nursery" if task._child_nurseries else "Cancel scope"
20212039
# The cancel scope surrounding this task's nursery was closed
20222040
# before the task exited. Force the task to exit with an error,
20232041
# since the error might not have been caught elsewhere. See the
@@ -2026,7 +2044,7 @@ def task_exited(self, task: Task, outcome: Outcome[object]) -> None:
20262044
# Raise this, rather than just constructing it, to get a
20272045
# traceback frame included
20282046
raise RuntimeError(
2029-
"Cancel scope stack corrupted: cancel scope surrounding "
2047+
f"{reason} stack corrupted: {reason} surrounding "
20302048
f"{task!r} was closed before the task exited\n{MISNESTING_ADVICE}",
20312049
)
20322050
except RuntimeError as new_exc:

src/trio/_core/_tests/test_run.py

Lines changed: 136 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,13 @@
88
import time
99
import types
1010
import weakref
11-
from contextlib import ExitStack, contextmanager, suppress
11+
from contextlib import (
12+
AsyncExitStack,
13+
ExitStack,
14+
asynccontextmanager,
15+
contextmanager,
16+
suppress,
17+
)
1218
from math import inf, nan
1319
from typing import TYPE_CHECKING, NoReturn, TypeVar
1420
from unittest import mock
@@ -761,7 +767,7 @@ async def enter_scope() -> None:
761767
assert scope.cancel_called # never become un-cancelled
762768

763769

764-
async def test_cancel_scope_misnesting() -> None:
770+
async def test_cancel_scope_misnesting_1() -> None:
765771
outer = _core.CancelScope()
766772
inner = _core.CancelScope()
767773
with ExitStack() as stack:
@@ -771,6 +777,8 @@ async def test_cancel_scope_misnesting() -> None:
771777
stack.close()
772778
# No further error is raised when exiting the inner context
773779

780+
781+
async def test_cancel_scope_misnesting_2() -> None:
774782
# If there are other tasks inside the abandoned part of the cancel tree,
775783
# they get cancelled when the misnesting is detected
776784
async def task1() -> None:
@@ -828,6 +836,8 @@ def no_context(exc: RuntimeError) -> bool:
828836
)
829837
assert group.matches(exc_info.value.__context__)
830838

839+
840+
async def test_cancel_scope_misnesting_3() -> None:
831841
# Trying to exit a cancel scope from an unrelated task raises an error
832842
# without affecting any state
833843
async def task3(task_status: _core.TaskStatus[_core.CancelScope]) -> None:
@@ -844,6 +854,130 @@ async def task3(task_status: _core.TaskStatus[_core.CancelScope]) -> None:
844854
scope.cancel()
845855

846856

857+
# helper to check we're not outputting overly verbose tracebacks
858+
def no_cause_or_context(e: BaseException) -> bool:
859+
return e.__cause__ is None and e.__context__ is None
860+
861+
862+
async def test_nursery_misnest() -> None:
863+
# See https://github.com/python-trio/trio/issues/3298
864+
async def inner_func() -> None:
865+
inner_nursery = await inner_cm.__aenter__()
866+
inner_nursery.start_soon(sleep, 1)
867+
868+
with pytest.RaisesGroup(
869+
pytest.RaisesExc(
870+
RuntimeError, match="Nursery stack corrupted", check=no_cause_or_context
871+
),
872+
check=no_cause_or_context,
873+
):
874+
async with _core.open_nursery() as outer_nursery:
875+
inner_cm = _core.open_nursery()
876+
outer_nursery.start_soon(inner_func)
877+
878+
879+
def test_nursery_nested_child_misnest() -> None:
880+
# Note that this example does *not* raise an exception group.
881+
async def main() -> None:
882+
async with _core.open_nursery():
883+
inner_cm = _core.open_nursery()
884+
await inner_cm.__aenter__()
885+
886+
with pytest.raises(RuntimeError, match="Nursery stack corrupted") as excinfo:
887+
_core.run(main)
888+
assert excinfo.value.__cause__ is None
889+
# This AssertionError is kind of redundant, but I don't think we want to remove
890+
# the assertion and don't think we care enough to suppress it in this specific case.
891+
assert pytest.RaisesExc(
892+
AssertionError, match="^Nursery misnesting detected!$"
893+
).matches(excinfo.value.__context__)
894+
assert excinfo.value.__context__.__cause__ is None
895+
assert excinfo.value.__context__.__context__ is None
896+
897+
898+
async def test_asyncexitstack_nursery_misnest() -> None:
899+
# This example is trickier than the above ones, and is the one that requires
900+
# special logic of abandoned nurseries to avoid nasty internal errors that masks
901+
# the RuntimeError.
902+
@asynccontextmanager
903+
async def asynccontextmanager_that_creates_a_nursery_internally() -> (
904+
AsyncGenerator[None]
905+
):
906+
async with _core.open_nursery() as nursery:
907+
await nursery.start(started_sleeper)
908+
nursery.start_soon(unstarted_task)
909+
yield
910+
911+
async def started_sleeper(task_status: _core.TaskStatus[None]) -> None:
912+
task_status.started()
913+
await sleep_forever()
914+
915+
async def unstarted_task() -> None:
916+
await _core.checkpoint()
917+
918+
with pytest.RaisesGroup(
919+
pytest.RaisesGroup(
920+
pytest.RaisesExc(
921+
RuntimeError, match="Nursery stack corrupted", check=no_cause_or_context
922+
),
923+
check=no_cause_or_context,
924+
),
925+
check=no_cause_or_context,
926+
):
927+
async with AsyncExitStack() as stack, _core.open_nursery() as nursery:
928+
# The asynccontextmanager is going to create a nursery that outlives this nursery!
929+
nursery.start_soon(
930+
stack.enter_async_context,
931+
asynccontextmanager_that_creates_a_nursery_internally(),
932+
)
933+
934+
935+
def test_asyncexitstack_nursery_misnest_cleanup() -> None:
936+
# We guarantee that abandoned tasks get to do cleanup *eventually*, but exceptions
937+
# are lost. With more effort it's possible we could reschedule child tasks to exit
938+
# promptly.
939+
finally_entered = []
940+
941+
async def main() -> None:
942+
async def unstarted_task() -> None:
943+
try:
944+
await _core.checkpoint()
945+
finally:
946+
finally_entered.append(True)
947+
raise ValueError("this exception is lost")
948+
949+
# rest of main() is ~identical to the above test
950+
@asynccontextmanager
951+
async def asynccontextmanager_that_creates_a_nursery_internally() -> (
952+
AsyncGenerator[None]
953+
):
954+
async with _core.open_nursery() as nursery:
955+
nursery.start_soon(unstarted_task)
956+
yield
957+
958+
with pytest.RaisesGroup(
959+
pytest.RaisesGroup(
960+
pytest.RaisesExc(
961+
RuntimeError,
962+
match="Nursery stack corrupted",
963+
check=no_cause_or_context,
964+
),
965+
check=no_cause_or_context,
966+
),
967+
check=no_cause_or_context,
968+
):
969+
async with AsyncExitStack() as stack, _core.open_nursery() as nursery:
970+
# The asynccontextmanager is going to create a nursery that outlives this nursery!
971+
nursery.start_soon(
972+
stack.enter_async_context,
973+
asynccontextmanager_that_creates_a_nursery_internally(),
974+
)
975+
assert not finally_entered # abandoned task still hasn't been cleaned up
976+
977+
_core.run(main)
978+
assert finally_entered # now it has
979+
980+
847981
@slow
848982
async def test_timekeeping() -> None:
849983
# probably a good idea to use a real clock for *one* test anyway...

test-requirements.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# For tests
2-
pytest >= 5.0 # for faulthandler in core
2+
pytest >= 8.4 # for pytest.RaisesGroup
33
coverage >= 7.2.5
44
async_generator >= 1.9
55
pyright

0 commit comments

Comments
 (0)