Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9fc3169
Elide suspension points via [[clang::coro_await_suspend_destroy]]
snarkmaster Aug 7, 2025
eb5557a
Fix CI
snarkmaster Aug 8, 2025
5d6a06d
Improve doc formatting
snarkmaster Aug 8, 2025
9fe0b17
Merge branch 'main' into coro_await_suspend_destroy
snarkmaster Aug 8, 2025
1dabfe6
Rework the AttrDocs.td addition based on feedback
snarkmaster Aug 9, 2025
62789ef
Split out the `libcxx/test` change into PR #152820
snarkmaster Aug 9, 2025
4835f37
Lift standard suspend flow to emitStandardAwaitSuspend; tweak comment.
snarkmaster Aug 9, 2025
3e84df1
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 9, 2025
b359f5f
Merge branch 'coro_await_suspend_destroy' of github.com:snarkmaster/l…
snarkmaster Aug 9, 2025
4b7707d
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 17, 2025
99703af
Elide suspension points via [[clang::coro_await_suspend_destroy]]
snarkmaster Aug 7, 2025
8bf453e
Fix CI
snarkmaster Aug 8, 2025
811501d
Improve doc formatting
snarkmaster Aug 8, 2025
63cf306
Rework the AttrDocs.td addition based on feedback
snarkmaster Aug 9, 2025
2b2748c
Split out the `libcxx/test` change into PR #152820
snarkmaster Aug 9, 2025
cf26f6b
Lift standard suspend flow to emitStandardAwaitSuspend; tweak comment.
snarkmaster Aug 9, 2025
f1e885c
Improvements in response to comments
snarkmaster Aug 17, 2025
4f4e815
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 19, 2025
2b3a9b7
Merge branch 'coro_await_suspend_destroy' of github.com:snarkmaster/l…
snarkmaster Aug 19, 2025
72274d2
Fix bad merge & some doc backticks
snarkmaster Aug 19, 2025
543bf07
Remove another leftover file from bad merge
snarkmaster Aug 19, 2025
a09b1f8
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 19, 2025
e6b6367
Address 2 more comments
snarkmaster Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,14 @@ Removed Compiler Flags
Attribute Changes in Clang
--------------------------

- Introduced a new attribute ``[[clang::coro_await_suspend_destroy]]``. When
applied to an ``await_suspend(std::coroutine_handle<Promise>)`` member of a
coroutine awaiter, it causes suspensions into this awaiter to use a new
``await_suspend_destroy(Promise&)`` method. The coroutine is then immediately
destroyed. This flow bypasses the original ``await_suspend()`` (though it
must contain a compatibility stub), and omits suspend intrinsics. The net
effect is improved code speed & size for "short-circuiting" coroutines.

Improvements to Clang's diagnostics
-----------------------------------
- Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic
Expand Down
11 changes: 11 additions & 0 deletions clang/include/clang/AST/ExprCXX.h
Original file line number Diff line number Diff line change
Expand Up @@ -5266,6 +5266,7 @@ class CoroutineSuspendExpr : public Expr {
: Expr(SC, Resume->getType(), Resume->getValueKind(),
Resume->getObjectKind()),
KeywordLoc(KeywordLoc), OpaqueValue(OpaqueValue) {
CoroutineSuspendExprBits.UseAwaitSuspendDestroy = false;
SubExprs[SubExpr::Operand] = Operand;
SubExprs[SubExpr::Common] = Common;
SubExprs[SubExpr::Ready] = Ready;
Expand All @@ -5279,6 +5280,7 @@ class CoroutineSuspendExpr : public Expr {
: Expr(SC, Ty, VK_PRValue, OK_Ordinary), KeywordLoc(KeywordLoc) {
assert(Common->isTypeDependent() && Ty->isDependentType() &&
"wrong constructor for non-dependent co_await/co_yield expression");
CoroutineSuspendExprBits.UseAwaitSuspendDestroy = false;
SubExprs[SubExpr::Operand] = Operand;
SubExprs[SubExpr::Common] = Common;
SubExprs[SubExpr::Ready] = nullptr;
Expand All @@ -5288,13 +5290,22 @@ class CoroutineSuspendExpr : public Expr {
}

CoroutineSuspendExpr(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) {
CoroutineSuspendExprBits.UseAwaitSuspendDestroy = false;
SubExprs[SubExpr::Operand] = nullptr;
SubExprs[SubExpr::Common] = nullptr;
SubExprs[SubExpr::Ready] = nullptr;
SubExprs[SubExpr::Suspend] = nullptr;
SubExprs[SubExpr::Resume] = nullptr;
}

bool useAwaitSuspendDestroy() const {
return CoroutineSuspendExprBits.UseAwaitSuspendDestroy;
}

void setUseAwaitSuspendDestroy(bool Use = true) {
CoroutineSuspendExprBits.UseAwaitSuspendDestroy = Use;
}

Expr *getCommonExpr() const {
return static_cast<Expr*>(SubExprs[SubExpr::Common]);
}
Expand Down
16 changes: 14 additions & 2 deletions clang/include/clang/AST/Stmt.h
Original file line number Diff line number Diff line change
Expand Up @@ -1258,12 +1258,23 @@ class alignas(void *) Stmt {

//===--- C++ Coroutines bitfields classes ---===//

class CoawaitExprBitfields {
friend class CoawaitExpr;
class CoroutineSuspendExprBitfields {
friend class CoroutineSuspendExpr;

LLVM_PREFERRED_TYPE(ExprBitfields)
unsigned : NumExprBits;

LLVM_PREFERRED_TYPE(bool)
unsigned UseAwaitSuspendDestroy : 1;
};
enum { NumCoroutineSuspendExprBits = NumExprBits + 1 };

class CoawaitExprBitfields {
friend class CoawaitExpr;

LLVM_PREFERRED_TYPE(CoroutineSuspendExprBitfields)
unsigned : NumCoroutineSuspendExprBits;

LLVM_PREFERRED_TYPE(bool)
unsigned IsImplicit : 1;
};
Expand Down Expand Up @@ -1388,6 +1399,7 @@ class alignas(void *) Stmt {
PackIndexingExprBitfields PackIndexingExprBits;

// C++ Coroutines expressions
CoroutineSuspendExprBitfields CoroutineSuspendExprBits;
CoawaitExprBitfields CoawaitBits;

// Obj-C Expressions
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
let SimpleHandler = 1;
}

def CoroAwaitSuspendDestroy: InheritableAttr {
let Spellings = [Clang<"coro_await_suspend_destroy">];
let Subjects = SubjectList<[CXXMethod]>;
let LangOpts = [CPlusPlus];
let Documentation = [CoroAwaitSuspendDestroyDoc];
let SimpleHandler = 1;
}

// OSObject-based attributes.
def OSConsumed : InheritableParamAttr {
let Spellings = [Clang<"os_consumed">];
Expand Down
120 changes: 120 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -9363,6 +9363,126 @@ Example:
}];
}

def CoroAwaitSuspendDestroyDoc : Documentation {
let Category = DocCatFunction;
let Content = [{

The ``[[clang::coro_await_suspend_destroy]]`` attribute applies to an
``await_suspend(std::coroutine_handle<Promise>)`` member function of a
coroutine awaiter. When applied, suspensions into the awaiter use an optimized
call path that bypasses standard suspend intrinsics, and immediately destroys
the suspending coro.

Instead of calling the annotated ``await_suspend()``, the coroutine calls
``await_suspend_destroy(Promise&)`` and immediately destroys the coroutine.

Although it is not called, it is strongly recommended that `await_suspend()`
contain the following portability stub. The stub ensures the awaiter behaves
equivalently without `coro_await_suspend_destroy` support, and makes the
control flow clear to readers unfamiliar with the attribute:

.. code-block:: c++

void await_suspend_destroy(Promise&) { /* actual implementation*/ }
[[clang::coro_await_suspend_destroy]]
void await_suspend(std::coroutine_handle<Promise> handle) {
// Stub to preserve behavior when the attribute is not supported
await_suspend_destroy(handle.promise());
handle.destroy();
}
Comment on lines +9376 to +9392
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still confusing. The behavior in my mind may be, the coroutine is immediately destroyed after the annotated await_suspend finished. In another word, the user can think we'll insert an unconditionally .destroy after the annotated await_suspend. The existence of await_suspend_destroy here is super confusing.

Copy link
Author

@snarkmaster snarkmaster Aug 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, sounds like I need to make it very explicit. Let me put my lawyer hat on! Let me know if the following is good enough, to replace the lines you highlighted?


Here is the formal contract for this attribute.

The attribute is considered active when both of these are true:

  • The compiler supports it -- i.e. the macro __has_cpp_attribute(clang::coro_await_suspend_destroy) expands to a nonzero integer.
  • The await_suspend overload applicable to the current coroutine's promise type is annotated with [[clang::coro_await_suspend_destroy]].

If the attribute is not active, then the compiler will follow the C++ standard suspension behavior. When await_ready() returns false:

  • First, the coroutine is suspended -- the compiler saves the coroutine state and creates a handle.
  • Then, await_suspend is invoked with the handle.
  • Note: Without an active attribute, await_suspend_destroy(Promise&) may be defined, but is not part of the compiler's protocol.

If the attribute is active, the compiler will follow this non-standard protocol whenever await_ready() returns false:

  • First, await_suspend_destroy is invoked with a mutable reference to the awaiting coroutine's promise.
  • Then, the coroutine is immediately destroyed, as if on co_return ...; but without invoking either return_void() or return_value().
  • Notes:
    • The coroutine is not suspended, and a handle is not created.
    • The applicable await_suspend is not called. It must still be declared, since the compiler looks for the attribute on this special member, but a definition is optional. NB: Before providing a definition, read the note on portability below.

Portability note: It is strongly recommended to write your code in a way that does not rely on support for this attribute. Fortunately, the attribute's contract is designed so that portability does not require conditional compilation.

Suppose you have the following standard await_suspend:

void await_suspend(std::coroutine_handle<MyPromise>& h) {
  record_suspension_via_promise(h.promise());
  h.destroy();
}

Without loss of portability, you can replace it by await_suspend_destroy, plus a fallback await_suspend. Depending on the compiler, either one may be the entry point, but the behavior will be the same -- except for the speed, size, and allocation-elision benefits of the attribute.

// Entry point when `clang::coro_await_suspend_destroy` is supported
void await_suspend_destroy(MyPromise& p) {
  record_suspension_via_promise(p);
}
// Entry point when `clang::coro_await_suspend_destroy` is not supported.
// Emits no code when `clang::coro_await_suspend_destroy` is supported.
[[clang::coro_await_suspend_destroy]]
void await_suspend(std::coroutine_handle<MyPromise>& h) {
  await_suspend_destroy(h.promise());
  h.destroy();
}

The "standard" and "replacement" forms are equivalent because the fallback await_suspend replicates the attribute's contract whenever the attribute is not supported by the compiler.

Warning: Even if you only use Clang, do not neglect to add the portability stub -- LLVM reserves the right to remove support for this attribute in a later major release.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is clear now but may be too verbose. But I get your point.


An awaiter type may provide both annotated and non-annotated overloads of
``await_suspend()``, as long as each invocation of an annotated overload has a
corresponding ``await_suspend_destroy(Promise&)`` overload.

The return type of ``await_suspend()`` must match ``await_suspend_destroy()``.
The latter must return ``void``. (Note: if desired, it would be
straightforward to also support the "symmetric transfer"
``std::coroutine_handle`` return type.)

This optimization improves code speed and size for "short-circuiting"
coroutines — those that use coroutine syntax **exclusively** for early returns
and control flow rather than true asynchronous operations.

Specifically, a short-circuiting awaiter is one that either proceeds
immediately (``await_ready()`` returns ``true``, skipping to
``await_resume()``) or terminates the coroutine execution.

Then, a short-circuiting coroutine is one where **all** the awaiters (including
``co_await``, ``co_yield``, initial, and final suspend) are short-circuiting.

The short-circuiting coroutine concept introduced above has close analogs in
other languages:

- Rust has ``Result<T>`` and a ``?`` operator to unpack it, while
``folly::result<T>`` is a C++ short-circuiting coroutine, within which
``co_await or_unwind(someResult())`` acts just like ``someResult()?``.

- Haskell has ``Maybe`` & ``Error`` monads. A short-circuiting ``co_await``
loosely corresponds to the monadic ``>>=``, whereas a short-circuiting
``std::optional`` coro would be an exact analog of ``Maybe``.

Returning to C++, even non-short-circuiting coroutines, including asynchronous
ones that suspend, may contain short-circuiting awaiters, and those might still
see some performance benefit if annotated.

Marking your ``await_suspend_destroy`` as ``noexcept`` can sometimes further
improve optimization.

However, if **all** awaiters within a coroutine are short-circuiting, then the
coro frame **can reliably be allocated on-stack**, making short-circuiting
coros behave qualitatively more like plain functions -- with better
optimization & more predictable behavior under memory pressure.

Technical aside: Heap elision becomes reliable because LLVM is allowed to elide
heap allocations whenever it can prove that the handle doesn't "escape" from
the coroutine. User code can only access the handle via suspend intrinsics,
and annotated short-circuiting awaiters simply don't use any.

Note that a short-circuiting coroutine differs in one important way from a
function that replaced each ``co_await awaiter`` with explicit control flow:

.. code-block:: c++

T value;
if (awaiter.await_ready()) {
value = awaiter.await_resume();
} else {
// ... content of `await_suspend_destroy` ...
return /* early-termination return object */;
}

That key difference is that ``unhandled_exception()`` lets the promise type
wrap the function body in an implicit try-catch. This automatic exception
boundary behavior can be desirable in robust, return-value-oriented programs
that benefit from short-circuiting coroutines. If not, the promise can
re-throw.

Here is an example of a short-circuiting awaiter for a hypothetical
``std::optional`` coroutine:

.. code-block:: c++

template <typename T>
struct optional_awaiter {
std::optional<T> opt_;
bool await_ready() const noexcept { return opt_.has_value(); }
T await_resume() { return std::move(opt_).value(); }
void await_suspend_destroy(auto& promise) {
// The return object of `promise`'s coro should default to "empty".
assert(!promise.returned_optional_ptr_->has_value());
}
[[clang::coro_await_suspend_destroy]]
void await_suspend(auto handle) {
// Fallback for when `coro_await_suspend_destroy` is unavailable.
await_suspend_destroy(handle.promise());
handle.destroy();
}
};

}];
}

def CountedByDocs : Documentation {
let Category = DocCatField;
let Content = [{
Expand Down
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12513,6 +12513,12 @@ def note_coroutine_promise_call_implicitly_required : Note<
def err_await_suspend_invalid_return_type : Error<
"return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)"
>;
def err_await_suspend_destroy_invalid_return_type : Error<
"return type of 'await_suspend_destroy' is required to be 'void' (have %0)"
>;
def err_await_suspend_suspend_destroy_return_type_mismatch : Error<
"return type of 'await_suspend' (%1) must match return type of 'await_suspend_destroy' (%0)"
>;
def note_await_ready_no_bool_conversion : Note<
"return type of 'await_ready' is required to be contextually convertible to 'bool'"
>;
Expand Down
Loading
Loading