Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 15 additions & 7 deletions src/trio/_core/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,27 +73,30 @@ class Cancelled(BaseException, metaclass=NoPublicConstructor):

"""

source: Literal["deadline", "nursery", "explicit", "unknown", "KeyboardInterrupt"]
# TODO: this should probably be a Task?
source: Literal[
"KeyboardInterrupt", "deadline", "explicit", "nursery", "shutdown", "unknown"
]
# repr(Task), so as to avoid gc troubles from holding a reference
source_task: str | None = None
Copy link
Contributor

@A5rocks A5rocks Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure repr(Task) is actually that useful? Like yes, it says what function this was started in and maybe you can locate the task through some gc machinery through the id, but...

Maybe a weakref would be more useful so people can access attributes? I feel like "where was I spawned" is already answered by the stack trace. (nevermind, just remembered this is the canceller not the cancellee)


Nevermind, I didn't think this suggestion through. A weakref wouldn't work for the common case (a task cancelling a sibling task).

I'm not convinced a strong ref here would be bad -- a Task doesn't store the exception or the cancellation reason so there's no reference cycle I think? But a string here is fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if a Task contains a CancelScope and the scope gets cancelled within the same task, the scope will then have a strong ref to a CancelReason which will then point back to the Task. I think?

the repr(Task) on its own is perhaps not super useful, but in case you have multiple cancellations going on at the same time that are only distinguished by the source task then you can visually distinguish them even without other sources of the task id.
Though it does also contain the name of the function itself:
<Task 'trio._core._tests.test_run.test_Cancelled_str' at 0x\w*>
which could be very helpful if you have different functions spawned in a nursery.

Copy link
Contributor

@A5rocks A5rocks May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I guess task -> parent/child nursery -> cancel scope -> cancel reason -> task is a loop, yeah. That's annoying. (Or maybe CoroutineType stores frames as a strongref? That too.)

reason: str | None = None

def __str__(self) -> str:
def repr_if_not_none(lead: str, s: str | None, trail: str = "") -> str:
def repr_if_not_none(lead: str, s: str | None, do_repr: bool = False) -> str:
if s is None:
return ""
return lead + s + trail
if do_repr:
return lead + repr(s)
return lead + s

return (
f"cancelled due to {self.source}"
+ repr_if_not_none(" with reason '", self.reason, "'")
+ repr_if_not_none(" with reason ", self.reason, True)
+ repr_if_not_none(" from task ", self.source_task)
)

def __reduce__(self) -> tuple[Callable[[], Cancelled], tuple[()]]:
# the `__reduce__` tuple does not support kwargs, so we must use partial
# for non-default args
# or switch to allow posarg (?)
return (
partial(
Cancelled._create,
Expand All @@ -111,7 +114,12 @@ def _create(
cls,
*,
source: Literal[
"deadline", "nursery", "explicit", "unknown", "KeyboardInterrupt"
"KeyboardInterrupt",
"deadline",
"explicit",
"nursery",
"shutdown",
"unknown",
],
source_task: str | None = None,
reason: str | None = None,
Expand Down
56 changes: 42 additions & 14 deletions src/trio/_core/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,14 @@ def expire(self, now: float) -> bool:

@attrs.define
class CancelReason:
# TODO: loren ipsum
source: Literal["deadline", "nursery", "explicit", "KeyboardInterrupt"]
"""Attached to a :class:`CancelScope` upon cancellation with details of the source of the
cancellation, which is then used to construct the string in a :exc:`Cancelled`.
Users can pass a ``reason`` str to :meth:`CancelScope.cancel` to set it.
"""

source: Literal[
"KeyboardInterrupt", "deadline", "explicit", "nursery", "shutdown", "unknown"
]
source_task: str | None = None
reason: str | None = None

Expand Down Expand Up @@ -580,8 +586,6 @@ class CancelScope:
_cancel_called: bool = attrs.field(default=False, init=False)
cancelled_caught: bool = attrs.field(default=False, init=False)

# necessary as cancel_status might be None
# TODO: but maybe cancel_status doesn't need it?
_cancel_reason: CancelReason | None = attrs.field(
default=None, init=False, repr=True
)
Expand Down Expand Up @@ -1230,9 +1234,10 @@ def parent_task(self) -> Task:
"(`~trio.lowlevel.Task`): The Task that opened this nursery."
return self._parent_task

def _add_exc(self, exc: BaseException) -> None:
def _add_exc(self, exc: BaseException, reason: CancelReason | None) -> None:
Copy link
Contributor

@A5rocks A5rocks Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the only callers of this are internal, IMO it would be cleaner to have them set the reason inline. Also, to avoid multiple comments for similar things, why doesn't this unconditionally set _cancel_reason = reason if it isn't None?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why doesn't this unconditionally set _cancel_reason = reason if it isn't None?

in case we get multiple sources of cancellation I don't want to override the first one. In other places it's more critical, but here I could see a scenario where:

  1. Something causes a cancellation, be it a deadline or a crashing task or whatever
  2. a different task B gets cancelled, but they have an except Cancelled, and inside that handler they raise a different exception
  3. without if self.cancel_scope._cancel_reason is None: the cause would now get set to task B raising an exception

so I'm pretty sure we need the if, which means we'd need to write the if three times if we did it in-line

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not relevant anymore

Copy link
Contributor

@A5rocks A5rocks May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't looked at the code to see why this isn't relevant anymore, but I already typed up a response comment to this:

I'm not entirely convinced avoiding this is a good thing:

async def crasher():
  await trio.sleep(2)
  raise ValueError("...")

with trio.open_nursery() as nursery:
  try:
    nursery.start_soon(crasher)
    try:
      await trio.sleep(10)  # app code
    finally:
      with trio.move_on_after(2, shield=True):
        await trio.sleep(3)  # cleanup code
  except trio.Cancelled as c:
    # what should c's cancel reason be
    raise

This might matter for instance in code for shutting down stuff on exceptions, moving on after 2 seconds. The cancel reason if the clean up code ran over its 2 seconds would presumably (?) be that the exceptions happened, not that the timeout happened. I think it would make more sense if the reason was instead about the timeout.

(I haven't played with this PR yet so I'm not sure that's actually what will happen)


Would it make sense to establish some sort of causal mechanism? I.e. a field on CancelReason that points to the old CancelReason. (I guess Cancelled could store another Cancelled? But that might be bad for cycles.)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah current behavior is that the crasher becomes the reason.

Storing a chain of Cancelled sounds tricky and very likely to induce gc problems. I'm pretty sure we'd have to store any raised Cancelled in the scope itself in order to be able to refer back to them.

... although the crash cancellation should be accessible somehow in the finally scope to be set as __context__. I wonder where that is getting lost

But storing a chain of reasons would be fairly straightforward and sounds like it might have some good use

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I found a repro where sys.exc_info() got cleared, but I might have mistaken myself and idr the repro anymore.

But going back to your example:
I have a pretty strong intuition that the reason the nursery scope is canceled is because a child crashed. The deadline is the reason the inner scope inside the finally is canceled, but that cancellation will be swallowed by move_on_after and even in a world where we stored a chain of reasons the nursery scope would never see the deadline cancellation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point: what's the cancel reason visible inside the move_on_after then?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure it was deadline, with the child-crashing cancelled in its __context__, because of the shielding. I can add a test case for it

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice, yeah that behavior sounds nice. Returning to the earliest response you have, is that (nursery cancel -> raise a different exception in one of the tasks) the only case where things try to overwrite the cancellation reason? If so, I think it would be nicer to make nurseries not try to cancel if they are already cancelled (which would prevent the cancellation reason from being overwritten).


I also see that changing the deadline can potentially overwrite. I don't see why that would try to cancel anything if things are already cancelled... I guess just code simplicity.

I guess it makes sense to try to handle it in one place with a check on the cancellation reason, then. I just don't like it!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

haha. Yeah if we rewrote everything from scratch we might implement it differently.

But I think there's a bunch of ways to re-cancel, including simply calling cs.cancel(...) multiple times

self._pending_excs.append(exc)
# TODO: source/reason?
if self.cancel_scope._cancel_reason is None:
self.cancel_scope._cancel_reason = reason
self.cancel_scope.cancel()

def _check_nursery_closed(self) -> None:
Expand All @@ -1249,11 +1254,14 @@ def _child_finished(
) -> None:
self._children.remove(task)
if isinstance(outcome, Error):
if self.cancel_scope._cancel_reason is None:
self.cancel_scope._cancel_reason = CancelReason(
source="nursery", source_task=repr(task)
)
self._add_exc(outcome.error)
self._add_exc(
outcome.error,
CancelReason(
source="nursery",
source_task=repr(task),
reason=f"child task raised exception {outcome.error!r}",
),
)
self._check_nursery_closed()

async def _nested_child_finished(
Expand All @@ -1263,7 +1271,14 @@ async def _nested_child_finished(
# Returns ExceptionGroup instance (or any exception if the nursery is in loose mode
# and there is just one contained exception) if there are pending exceptions
if nested_child_exc is not None:
self._add_exc(nested_child_exc)
self._add_exc(
nested_child_exc,
reason=CancelReason(
source="nursery",
source_task=repr(self._parent_task),
reason=f"Code block inside nursery contextmanager raised exception {nested_child_exc!r}",
),
)
self._nested_child_running = False
self._check_nursery_closed()

Expand All @@ -1274,7 +1289,13 @@ async def _nested_child_finished(
def aborted(raise_cancel: _core.RaiseCancelT) -> Abort:
exn = capture(raise_cancel).error
if not isinstance(exn, Cancelled):
self._add_exc(exn)
self._add_exc(
exn,
CancelReason(
source="KeyboardInterrupt",
source_task=repr(self._parent_task),
),
)
# see test_cancel_scope_exit_doesnt_create_cyclic_garbage
del exn # prevent cyclic garbage creation
return Abort.FAILED
Expand All @@ -1288,7 +1309,8 @@ def aborted(raise_cancel: _core.RaiseCancelT) -> Abort:
try:
await cancel_shielded_checkpoint()
except BaseException as exc:
self._add_exc(exc)
# there's no children to cancel, so don't need to supply cancel reason
self._add_exc(exc, reason=None)

popped = self._parent_task._child_nurseries.pop()
assert popped is self
Expand Down Expand Up @@ -2134,6 +2156,12 @@ async def init(

# Main task is done; start shutting down system tasks
# TODO: source/reason?
self.system_nursery.cancel_scope._cancel_reason = CancelReason(
source="shutdown",
reason="main task done, shutting down system tasks",
source_task=repr(self.init_task),
)

self.system_nursery.cancel_scope.cancel()

# System nursery is closed; finalize remaining async generators
Expand Down
50 changes: 48 additions & 2 deletions src/trio/_core/_tests/test_cancel.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One missing test: how about a task that is stated via nursery.start and raises before task_status.started()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh good catch, currently the reason becomes "Code block inside nursery contextmanager raised exception [...]" - which is quite misleading

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, I'm not sure how to fix that other than to just make the message more generic. There's not really any way of distinguishing the two cases, and the user might handle the exception that was generated in the start() so can't add logic in there.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we catch an exception from the helper nursery and then if so, pre-emptively cancel with a better reason?

Copy link
Member Author

@jakkdl jakkdl May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only sort-of-reasonable way I can see of doing it is to save the exception that is raised from start, and then if __aexit__ has the same exception, we can infer that is the cause.
But because failing to start() isn't actually a reason for cancellation, it can only ever be an indirect cause if that exception causes the CM block to exit, it's kind of weird to use it as the reason.

But even if the reason might be slightly misleading, I think the reasonable next step in debugging is for the developer to look up backtraces to find out what the exception is that killed the CM block, and they will then quickly find out what happened.

So thinking about it a bit more I'm not sure this is an issue in practice

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd vote to merge, and then revisit if anyone reports that this is an issue in practice.

Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from trio.lowlevel import current_task
from trio.testing import RaisesGroup

from .test_ki import ki_self


async def test_cancel_reason() -> None:
with trio.CancelScope() as cs:
Expand Down Expand Up @@ -42,7 +44,8 @@ async def cancelled_task(
) -> None:
task_status.started()
with pytest.raises(
Cancelled, match=rf"^cancelled due to nursery from task {fail_task!r}$"
Cancelled,
match=rf"^cancelled due to nursery with reason 'child task raised exception ValueError\(\)' from task {fail_task!r}$",
):
await trio.sleep_forever()
raise TypeError
Expand All @@ -66,7 +69,8 @@ async def cancelled_task(
) -> None:
task_status.started()
with pytest.raises(
Cancelled, match=rf"^cancelled due to nursery from task {fail_task!r}$"
Cancelled,
match=rf"^cancelled due to nursery with reason 'child task raised exception ValueError\(\)' from task {fail_task!r}$",
):
await trio.sleep_forever()
raise TypeError
Expand Down Expand Up @@ -103,3 +107,45 @@ async def test_cancel_reason_not_overwritten_2() -> None:
cs.cancel()
with pytest.raises(Cancelled, match=r"^cancelled due to deadline$"):
await trio.lowlevel.checkpoint()


async def test_nested_child_source() -> None:
ev = trio.Event()
parent_task = current_task()

async def child() -> None:
ev.set()
with pytest.raises(
Cancelled,
match=rf"^cancelled due to nursery with reason 'Code block inside nursery contextmanager raised exception ValueError\(\)' from task {parent_task!r}$",
):
await trio.sleep_forever()

with RaisesGroup(ValueError):
async with trio.open_nursery() as nursery:
nursery.start_soon(child)
await ev.wait()
raise ValueError


async def test_reason_delayed_ki() -> None:
# simplified version of test_ki.test_ki_protection_works check #2
parent_task = current_task()

async def sleeper(name: str) -> None:
with pytest.raises(
Cancelled,
match=rf"^cancelled due to KeyboardInterrupt from task {parent_task!r}$",
):
while True:
await trio.lowlevel.checkpoint()

async def raiser(name: str) -> None:
ki_self()

with RaisesGroup(KeyboardInterrupt):
async with trio.open_nursery() as nursery:
nursery.start_soon(sleeper, "s1")
nursery.start_soon(sleeper, "s2")
nursery.start_soon(trio.lowlevel.enable_ki_protection(raiser), "r1")
# __aexit__ blocks, and then receives the KI
3 changes: 1 addition & 2 deletions src/trio/_subprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,8 +766,7 @@ async def killer() -> None:

nursery.start_soon(killer)
await proc.wait()
# TODO: source/reason?
killer_cscope.cancel()
killer_cscope.cancel(reason="trio internal implementation detail")
raise

stdout = b"".join(stdout_chunks) if capture_stdout else None
Expand Down
Loading