Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/trio/_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,7 +547,7 @@ async def context_manager(
yield wrapped_recv_chan
# User has exited context manager, cancel to immediately close the
# abandoned generator if it's still alive.
nursery.cancel_scope.cancel()
nursery.cancel_scope.cancel(reason="user exited context manager")
except BaseExceptionGroup as eg:
try:
raise_single_exception_from_group(eg)
Expand Down
4 changes: 3 additions & 1 deletion src/trio/_core/_asyncgens.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,9 @@ async def _finalize_one(
# with an exception, not even a Cancelled. The inside
# is cancelled so there's no deadlock risk.
with _core.CancelScope(shield=True) as cancel_scope:
cancel_scope.cancel()
cancel_scope.cancel(
reason="internal trio cancellation during finalization"
)
await agen.aclose()
except BaseException:
ASYNCGEN_LOGGER.exception(
Expand Down
50 changes: 47 additions & 3 deletions src/trio/_core/_exceptions.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from functools import partial
from typing import TYPE_CHECKING, Literal

import attrs

from trio._util import NoPublicConstructor, final

if TYPE_CHECKING:
from collections.abc import Callable

from typing_extensions import Self


class TrioInternalError(Exception):
"""Raised by :func:`run` if we encounter a bug in Trio, or (possibly) a
Expand Down Expand Up @@ -34,6 +39,7 @@ class WouldBlock(Exception):


@final
@attrs.define(eq=False, kw_only=True)
class Cancelled(BaseException, metaclass=NoPublicConstructor):
"""Raised by blocking calls if the surrounding scope has been cancelled.

Expand Down Expand Up @@ -67,11 +73,49 @@ class Cancelled(BaseException, metaclass=NoPublicConstructor):

"""

source: Literal["deadline", "nursery", "explicit", "unknown", "KeyboardInterrupt"]
# TODO: this should probably be a Task?
source_task: str | None = None
Copy link
Contributor

@A5rocks A5rocks Apr 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure repr(Task) is actually that useful? Like yes, it says what function this was started in and maybe you can locate the task through some gc machinery through the id, but...

Maybe a weakref would be more useful so people can access attributes? I feel like "where was I spawned" is already answered by the stack trace. (nevermind, just remembered this is the canceller not the cancellee)


Nevermind, I didn't think this suggestion through. A weakref wouldn't work for the common case (a task cancelling a sibling task).

I'm not convinced a strong ref here would be bad -- a Task doesn't store the exception or the cancellation reason so there's no reference cycle I think? But a string here is fine.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if a Task contains a CancelScope and the scope gets cancelled within the same task, the scope will then have a strong ref to a CancelReason which will then point back to the Task. I think?

the repr(Task) on its own is perhaps not super useful, but in case you have multiple cancellations going on at the same time that are only distinguished by the source task then you can visually distinguish them even without other sources of the task id.
Though it does also contain the name of the function itself:
<Task 'trio._core._tests.test_run.test_Cancelled_str' at 0x\w*>
which could be very helpful if you have different functions spawned in a nursery.

Copy link
Contributor

@A5rocks A5rocks May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I guess task -> parent/child nursery -> cancel scope -> cancel reason -> task is a loop, yeah. That's annoying. (Or maybe CoroutineType stores frames as a strongref? That too.)

reason: str | None = None

def __str__(self) -> str:
return "Cancelled"
def repr_if_not_none(lead: str, s: str | None, trail: str = "") -> str:
if s is None:
return ""
return lead + s + trail

return (
f"cancelled due to {self.source}"
+ repr_if_not_none(" with reason '", self.reason, "'")
+ repr_if_not_none(" from task ", self.source_task)
)

def __reduce__(self) -> tuple[Callable[[], Cancelled], tuple[()]]:
return (Cancelled._create, ())
# the `__reduce__` tuple does not support kwargs, so we must use partial
# for non-default args
# or switch to allow posarg (?)
return (
partial(
Cancelled._create,
source=self.source,
source_task=self.source_task,
reason=self.reason,
),
(),
)

if TYPE_CHECKING:
# for type checking on internal code
@classmethod
def _create(
cls,
*,
source: Literal[
"deadline", "nursery", "explicit", "unknown", "KeyboardInterrupt"
],
source_task: str | None = None,
reason: str | None = None,
) -> Self: ...


class BusyResourceError(Exception):
Expand Down
77 changes: 74 additions & 3 deletions src/trio/_core/_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
TYPE_CHECKING,
Any,
Final,
Literal,
NoReturn,
Protocol,
cast,
Expand Down Expand Up @@ -305,6 +306,7 @@ def expire(self, now: float) -> bool:
did_something = True
# This implicitly calls self.remove(), so we don't need to
# decrement _active here
cancel_scope._cancel_reason = CancelReason(source="deadline")
cancel_scope.cancel()
# If we've accumulated too many stale entries, then prune the heap to
# keep it under control. (We only do this occasionally in a batch, to
Expand All @@ -314,6 +316,14 @@ def expire(self, now: float) -> bool:
return did_something


@attrs.define
class CancelReason:
# TODO: loren ipsum
source: Literal["deadline", "nursery", "explicit", "KeyboardInterrupt"]
source_task: str | None = None
reason: str | None = None


@attrs.define(eq=False)
class CancelStatus:
"""Tracks the cancellation status for a contiguous extent
Expand Down Expand Up @@ -468,10 +478,22 @@ def recalculate(self) -> None:
or current.parent_cancellation_is_visible_to_us
)
if new_state != current.effectively_cancelled:
if (
current._scope._cancel_reason is None
and current.parent_cancellation_is_visible_to_us
):
assert current._parent is not None
current._scope._cancel_reason = (
current._parent._scope._cancel_reason
)
current.effectively_cancelled = new_state
if new_state:
for task in current._tasks:
task._attempt_delivery_of_any_pending_cancel()
# current._cancel_reason will not be None, unless there's misnesting
for child in current._children:
if child._scope._cancel_reason is None:
child._scope._cancel_reason = current._scope._cancel_reason
todo.extend(current._children)

def _mark_abandoned(self) -> None:
Expand Down Expand Up @@ -558,6 +580,12 @@ class CancelScope:
_cancel_called: bool = attrs.field(default=False, init=False)
cancelled_caught: bool = attrs.field(default=False, init=False)

# necessary as cancel_status might be None
# TODO: but maybe cancel_status doesn't need it?
_cancel_reason: CancelReason | None = attrs.field(
default=None, init=False, repr=True
)

# Constructor arguments:
_relative_deadline: float = attrs.field(
default=inf,
Expand Down Expand Up @@ -594,6 +622,7 @@ def __enter__(self) -> Self:
self._relative_deadline = inf

if current_time() >= self._deadline:
self._cancel_reason = CancelReason(source="deadline")
self.cancel()
with self._might_change_registered_deadline():
self._cancel_status = CancelStatus(scope=self, parent=task._cancel_status)
Expand Down Expand Up @@ -883,7 +912,7 @@ def shield(self, new_value: bool) -> None:
self._cancel_status.recalculate()

@enable_ki_protection
def cancel(self) -> None:
def cancel(self, *, reason: str | None = None) -> None:
"""Cancels this scope immediately.

This method is idempotent, i.e., if the scope was already
Expand All @@ -893,6 +922,14 @@ def cancel(self) -> None:
return
with self._might_change_registered_deadline():
self._cancel_called = True
if self._cancel_reason is None:
try:
current_task = repr(_core.current_task())
except RuntimeError:
current_task = None
self._cancel_reason = CancelReason(
reason=reason, source="explicit", source_task=current_task
)
if self._cancel_status is not None:
self._cancel_status.recalculate()

Expand Down Expand Up @@ -924,6 +961,7 @@ def cancel_called(self) -> bool:
# but it makes the value returned by cancel_called more
# closely match expectations.
if not self._cancel_called and current_time() >= self._deadline:
self._cancel_reason = CancelReason(source="deadline")
self.cancel()
return self._cancel_called

Expand Down Expand Up @@ -1194,6 +1232,7 @@ def parent_task(self) -> Task:

def _add_exc(self, exc: BaseException) -> None:
self._pending_excs.append(exc)
# TODO: source/reason?
self.cancel_scope.cancel()

def _check_nursery_closed(self) -> None:
Expand All @@ -1210,6 +1249,10 @@ def _child_finished(
) -> None:
self._children.remove(task)
if isinstance(outcome, Error):
if self.cancel_scope._cancel_reason is None:
self.cancel_scope._cancel_reason = CancelReason(
source="nursery", source_task=repr(task)
)
self._add_exc(outcome.error)
self._check_nursery_closed()

Expand Down Expand Up @@ -1575,8 +1618,23 @@ def _attempt_delivery_of_any_pending_cancel(self) -> None:
if not self._cancel_status.effectively_cancelled:
return

# FIXME: I haven't quite figured out how to get the cancel reason to stay
# alive for passing into other threads, but also not cause any cyclic garbage.
if (
self._cancel_status is None
or self._cancel_status._scope._cancel_reason is None
):
# _cancel_status._cancel_reason is None when misnesting
cancelled = Cancelled._create(source="unknown", reason="misnesting")
else:
cancelled = Cancelled._create(
source=self._cancel_status._scope._cancel_reason.source,
reason=self._cancel_status._scope._cancel_reason.reason,
source_task=self._cancel_status._scope._cancel_reason.source_task,
)

def raise_cancel() -> NoReturn:
raise Cancelled._create()
raise cancelled

self._attempt_abort(raise_cancel)

Expand Down Expand Up @@ -2075,6 +2133,7 @@ async def init(
)

# Main task is done; start shutting down system tasks
# TODO: source/reason?
self.system_nursery.cancel_scope.cancel()

# System nursery is closed; finalize remaining async generators
Expand All @@ -2083,6 +2142,7 @@ async def init(
# There are no more asyncgens, which means no more user-provided
# code except possibly run_sync_soon callbacks. It's finally safe
# to stop the run_sync_soon task and exit run().
# TODO: source/reason?
run_sync_soon_nursery.cancel_scope.cancel()

################
Expand Down Expand Up @@ -2926,7 +2986,18 @@ async def checkpoint() -> None:
if task._cancel_status.effectively_cancelled or (
task is task._runner.main_task and task._runner.ki_pending
):
with CancelScope(deadline=-inf):
cs = CancelScope(deadline=inf)
if (
task._cancel_status._scope._cancel_reason is None
and task is task._runner.main_task
and task._runner.ki_pending
):
task._cancel_status._scope._cancel_reason = CancelReason(
source="KeyboardInterrupt"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's kind of strange to me to set this here. Is there no way for the thing raising KeyboardInterrupt to set this cancel reason?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it's a super ugly place.

KeyboardInterrupt is raised in two places: in Task._attempt_delivery_of_pending_ki where we ... probably could access self._parent_nursery.cancel_scope to set the reason. But the other one is in _ki.py KIManager.install() and that one can't see shit. But if we want it to be analogous to Cancelled that one doesn't set the reason on raising, it sets it when the scope is cancelled - and Cancelled is only raised later when a checkpoint is actually hit. And #3233 would have to throw that away

Though the current logic doesn't make sense in that regard either, the scope has already effectively been canceled, but it's not until we checkpoint that we set the reason to KeyboardInterrupt.

I'd love to place it in Runner.deliver_ki, which is the one that actually sets ki_pending = True... but navigating from there to the revelant scope and set a reason is non-trivial.

With the current setup there's even a case for saying we shouldn't bother setting a reason at all, because the scope hasn't actually been canceled. And during KI we don't raise Cancelled, so we don't have a consumer for the CancelReason. You can see it through introspection.. but that seems like a big stretch during KI.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my current error when trying to do it in deliver_ki is that setting self.main_task._cancel_status._scope._cancel_reason errors because _cancel_status can be None... which confuses me because there's a comment saying it can only be None in the init task.. but I'm explicitly accessing main_task :S

But I'm starting to lean towards not setting the reason, though #3233 would have to revisit that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, we can just leave the status quo and fix it in #3233 unless we expect the PRs to be in separate releases

)
assert task._cancel_status._scope._cancel_reason is not None
cs._cancel_reason = task._cancel_status._scope._cancel_reason
with cs:
await _core.wait_task_rescheduled(lambda _: _core.Abort.SUCCEEDED)


Expand Down
105 changes: 105 additions & 0 deletions src/trio/_core/_tests/test_cancel.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One missing test: how about a task that is stated via nursery.start and raises before task_status.started()?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh good catch, currently the reason becomes "Code block inside nursery contextmanager raised exception [...]" - which is quite misleading

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hm, I'm not sure how to fix that other than to just make the message more generic. There's not really any way of distinguishing the two cases, and the user might handle the exception that was generated in the start() so can't add logic in there.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we catch an exception from the helper nursery and then if so, pre-emptively cancel with a better reason?

Copy link
Member Author

@jakkdl jakkdl May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only sort-of-reasonable way I can see of doing it is to save the exception that is raised from start, and then if __aexit__ has the same exception, we can infer that is the cause.
But because failing to start() isn't actually a reason for cancellation, it can only ever be an indirect cause if that exception causes the CM block to exit, it's kind of weird to use it as the reason.

But even if the reason might be slightly misleading, I think the reasonable next step in debugging is for the developer to look up backtraces to find out what the exception is that killed the CM block, and they will then quickly find out what happened.

So thinking about it a bit more I'm not sure this is an issue in practice

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd vote to merge, and then revisit if anyone reports that this is an issue in practice.

Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# there's tons of cancellation testing in test_run, but that file is 3k lines long already...

from math import inf

import pytest

import trio
from trio import Cancelled
from trio.lowlevel import current_task
from trio.testing import RaisesGroup


async def test_cancel_reason() -> None:
with trio.CancelScope() as cs:
cs.cancel(reason="hello")
with pytest.raises(
Cancelled,
match=rf"^cancelled due to explicit with reason 'hello' from task {current_task()!r}$",
):
await trio.lowlevel.checkpoint()

with trio.CancelScope(deadline=-inf) as cs:
with pytest.raises(Cancelled, match=r"^cancelled due to deadline"):
await trio.lowlevel.checkpoint()

with trio.CancelScope() as cs:
cs.deadline = -inf
with pytest.raises(
Cancelled,
match=r"^cancelled due to deadline",
):
await trio.lowlevel.checkpoint()


async def test_cancel_reason_nursery() -> None:
async def failing_task(task_status: trio.TaskStatus[trio.lowlevel.Task]) -> None:
task_status.started(current_task())
raise ValueError

async def cancelled_task(
fail_task: trio.lowlevel.Task, task_status: trio.TaskStatus
) -> None:
task_status.started()
with pytest.raises(
Cancelled, match=rf"^cancelled due to nursery from task {fail_task!r}$"
):
await trio.sleep_forever()
raise TypeError

with RaisesGroup(ValueError, TypeError):
async with trio.open_nursery() as nursery:
fail_task = await nursery.start(failing_task)
await nursery.start(cancelled_task, fail_task)


async def test_cancel_reason_nursery2() -> None:
async def failing_task(
event: trio.Event, task_status: trio.TaskStatus[trio.lowlevel.Task]
) -> None:
task_status.started(current_task())
await event.wait()
raise ValueError

async def cancelled_task(
fail_task: trio.lowlevel.Task, task_status: trio.TaskStatus
) -> None:
task_status.started()
with pytest.raises(
Cancelled, match=rf"^cancelled due to nursery from task {fail_task!r}$"
):
await trio.sleep_forever()
raise TypeError

with RaisesGroup(ValueError, TypeError):
async with trio.open_nursery() as nursery:
event = trio.Event()
fail_task = await nursery.start(failing_task, event)
await nursery.start(cancelled_task, fail_task)
event.set()


async def test_cancel_reason_not_overwritten() -> None:
with trio.CancelScope() as cs:
cs.cancel()
with pytest.raises(
Cancelled,
match=rf"^cancelled due to explicit from task {current_task()!r}$",
):
await trio.lowlevel.checkpoint()
cs.deadline = -inf
with pytest.raises(
Cancelled,
match=rf"^cancelled due to explicit from task {current_task()!r}$",
):
await trio.lowlevel.checkpoint()


async def test_cancel_reason_not_overwritten_2() -> None:
with trio.CancelScope() as cs:
cs.deadline = -inf
with pytest.raises(Cancelled, match=r"^cancelled due to deadline$"):
await trio.lowlevel.checkpoint()
cs.cancel()
with pytest.raises(Cancelled, match=r"^cancelled due to deadline$"):
await trio.lowlevel.checkpoint()
Loading
Loading