Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
9fc3169
Elide suspension points via [[clang::coro_await_suspend_destroy]]
snarkmaster Aug 7, 2025
eb5557a
Fix CI
snarkmaster Aug 8, 2025
5d6a06d
Improve doc formatting
snarkmaster Aug 8, 2025
9fe0b17
Merge branch 'main' into coro_await_suspend_destroy
snarkmaster Aug 8, 2025
1dabfe6
Rework the AttrDocs.td addition based on feedback
snarkmaster Aug 9, 2025
62789ef
Split out the `libcxx/test` change into PR #152820
snarkmaster Aug 9, 2025
4835f37
Lift standard suspend flow to emitStandardAwaitSuspend; tweak comment.
snarkmaster Aug 9, 2025
3e84df1
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 9, 2025
b359f5f
Merge branch 'coro_await_suspend_destroy' of github.com:snarkmaster/l…
snarkmaster Aug 9, 2025
4b7707d
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 17, 2025
99703af
Elide suspension points via [[clang::coro_await_suspend_destroy]]
snarkmaster Aug 7, 2025
8bf453e
Fix CI
snarkmaster Aug 8, 2025
811501d
Improve doc formatting
snarkmaster Aug 8, 2025
63cf306
Rework the AttrDocs.td addition based on feedback
snarkmaster Aug 9, 2025
2b2748c
Split out the `libcxx/test` change into PR #152820
snarkmaster Aug 9, 2025
cf26f6b
Lift standard suspend flow to emitStandardAwaitSuspend; tweak comment.
snarkmaster Aug 9, 2025
f1e885c
Improvements in response to comments
snarkmaster Aug 17, 2025
4f4e815
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 19, 2025
2b3a9b7
Merge branch 'coro_await_suspend_destroy' of github.com:snarkmaster/l…
snarkmaster Aug 19, 2025
72274d2
Fix bad merge & some doc backticks
snarkmaster Aug 19, 2025
543bf07
Remove another leftover file from bad merge
snarkmaster Aug 19, 2025
a09b1f8
Merge branch 'llvm:main' into coro_await_suspend_destroy
snarkmaster Aug 19, 2025
e6b6367
Address 2 more comments
snarkmaster Aug 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/docs/ReleaseNotes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ Removed Compiler Flags
Attribute Changes in Clang
--------------------------

- Introduced a new attribute ``[[clang::coro_await_suspend_destroy]]``. When
applied to a coroutine awaiter class, it causes suspensions into this awaiter
to use a new `await_suspend_destroy(Promise&)` method instead of the standard
`await_suspend(std::coroutine_handle<...>)`. The coroutine is then destroyed.
This improves code speed & size for "short-circuiting" coroutines.

Improvements to Clang's diagnostics
-----------------------------------
- Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic
Expand Down
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
let SimpleHandler = 1;
}

def CoroAwaitSuspendDestroy: InheritableAttr {
let Spellings = [Clang<"coro_await_suspend_destroy">];
let Subjects = SubjectList<[CXXRecord]>;
let LangOpts = [CPlusPlus];
let Documentation = [CoroAwaitSuspendDestroyDoc];
let SimpleHandler = 1;
}

// OSObject-based attributes.
def OSConsumed : InheritableParamAttr {
let Spellings = [Clang<"os_consumed">];
Expand Down
104 changes: 104 additions & 0 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -9270,6 +9270,110 @@ Example:
}];
}

def CoroAwaitSuspendDestroyDoc : Documentation {
let Category = DocCatDecl;
let Content = [{

The ``[[clang::coro_await_suspend_destroy]]`` attribute may be applied to a C++
coroutine awaiter type. When this attribute is present, the awaiter must
implement ``void await_suspend_destroy(Promise&)``. If ``await_ready()``
returns ``false`` at a suspension point, ``await_suspend_destroy`` will be
called directly. The coroutine being suspended will then be immediately
destroyed.

The new behavior is equivalent to this standard code:

.. code-block:: c++

void await_suspend_destroy(YourPromise&) { ... }
void await_suspend(auto handle) {
await_suspend_destroy(handle.promise());
handle.destroy();
}

This enables `await_suspend_destroy()` usage in portable awaiters — just add a
stub ``await_suspend()`` as above. Without ``coro_await_suspend_destroy``
support, the awaiter will behave nearly identically, with the only difference
being heap allocation instead of stack allocation for the coroutine frame.

This attribute helps optimize short-circuiting coroutines.

A short-circuiting coroutine is one where every ``co_await`` or ``co_yield``
either immediately produces a value, or exits the coroutine. In other words,
they use coroutine syntax to concisely branch out of a synchronous function.
Here are close analogs in other languages:

- Rust has ``Result<T>`` and a ``?`` operator to unpack it, while
``folly::result<T>`` is a C++ short-circuiting coroutine, with ``co_await``
acting just like ``?``.

- Haskell has ``Maybe`` & ``Error`` monads. A short-circuiting ``co_await``
loosely corresponds to the monadic ``>>=``, whereas a short-circuiting
``std::optional`` coro would be an exact analog of ``Maybe``.

The C++ implementation relies on short-circuiting awaiters. These either
resume synchronously, or immediately destroy the awaiting coroutine and return
control to the parent:

.. code-block:: c++

T val;
if (awaiter.await_ready()) {
val = awaiter.await_resume();
} else {
awaiter.await_suspend();
return /* value representing the "execution short-circuited" outcome */;
}

Then, a short-ciruiting coroutine is one where all the suspend points are
either (i) trivial (like ``std::suspend_never``), or (ii) short-circuiting.

Although the coroutine machinery makes them harder to optimize, logically,
short-circuiting coroutines are like syntax sugar for regular functions where:

- `co_await` allows expressions to return early.

- `unhandled_exception()` lets the coroutine promise type wrap the function
body in an implicit try-catch. This mandatory exception boundary behavior
can be desirable in robust, return-value-oriented programs that benefit from
short-circuiting coroutines. If not, the promise can always re-throw.

This attribute improves short-circuiting coroutines in a few ways:

- **Avoid heap allocations for coro frames**: Allocating short-circuiting
coros on the stack makes code more predictable under memory pressure.
Without this attribute, LLVM cannot elide heap allocation even when all
awaiters are short-circuiting.

- **Performance**: Significantly faster execution and smaller code size.

- **Build time**: Faster compilation due to less IR being generated.

Marking your ``await_suspend_destroy`` method as ``noexcept`` can sometimes
further improve optimization.

Here is a toy example of a portable short-circuiting awaiter:

.. code-block:: c++

template <typename T>
struct [[clang::coro_await_suspend_destroy]] optional_awaiter {
std::optional<T> opt_;
bool await_ready() const noexcept { return opt_.has_value(); }
T await_resume() { return std::move(opt_).value(); }
void await_suspend_destroy(auto& promise) {
// Assume the return object of the outer coro defaults to "empty".
}
// Fallback for when `coro_await_suspend_destroy` is unavailable.
void await_suspend(auto handle) {
await_suspend_destroy(handle.promise());
handle.destroy();
}
};

}];
}

def CountedByDocs : Documentation {
let Category = DocCatField;
let Content = [{
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -12504,6 +12504,9 @@ def note_coroutine_promise_call_implicitly_required : Note<
def err_await_suspend_invalid_return_type : Error<
"return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)"
>;
def err_await_suspend_destroy_invalid_return_type : Error<
"return type of 'await_suspend_destroy' is required to be 'void' (have %0)"
>;
def note_await_ready_no_bool_conversion : Note<
"return type of 'await_ready' is required to be contextually convertible to 'bool'"
>;
Expand Down
186 changes: 146 additions & 40 deletions clang/lib/CodeGen/CGCoroutine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,66 @@ static bool StmtCanThrow(const Stmt *S) {
return false;
}

// Check if this suspend should be calling `await_suspend_destroy`
static bool useCoroAwaitSuspendDestroy(const CoroutineSuspendExpr &S) {
// This can only be an `await_suspend_destroy` suspend expression if it
// returns void -- `buildCoawaitCalls` in `SemaCoroutine.cpp` asserts this.
// Moreover, when `await_suspend` returns a handle, the outermost method call
// is `.address()` -- making it harder to get the actual class or method.
if (S.getSuspendReturnType() !=
CoroutineSuspendExpr::SuspendReturnType::SuspendVoid) {
return false;
}

// `CGCoroutine.cpp` & `SemaCoroutine.cpp` must agree on whether this suspend
// expression uses `[[clang::coro_await_suspend_destroy]]`.
//
// Any mismatch is a serious bug -- we would either double-free, or fail to
// destroy the promise type. For this reason, we make our decision based on
// the method name, and fatal outside of the happy path -- including on
// failure to find a method name.
//
// As a debug-only check we also try to detect the `AwaiterClass`. This is
// secondary, because detection of the awaiter type can be silently broken by
// small `buildCoawaitCalls` AST changes.
StringRef SuspendMethodName; // Primary
CXXRecordDecl *AwaiterClass = nullptr; // Debug-only, best-effort
if (auto *SuspendCall =
dyn_cast<CallExpr>(S.getSuspendExpr()->IgnoreImplicit())) {
if (auto *SuspendMember = dyn_cast<MemberExpr>(SuspendCall->getCallee())) {
if (auto *BaseExpr = SuspendMember->getBase()) {
// `IgnoreImplicitAsWritten` is critical since `await_suspend...` can be
// invoked on the base of the actual awaiter, and the base need not have
// the attribute. In such cases, the AST will show the true awaiter
// being upcast to the base.
AwaiterClass = BaseExpr->IgnoreImplicitAsWritten()
->getType()
->getAsCXXRecordDecl();
}
if (auto *SuspendMethod =
dyn_cast<CXXMethodDecl>(SuspendMember->getMemberDecl())) {
SuspendMethodName = SuspendMethod->getName();
}
}
}
if (SuspendMethodName == "await_suspend_destroy") {
assert(!AwaiterClass ||
AwaiterClass->hasAttr<CoroAwaitSuspendDestroyAttr>());
return true;
} else if (SuspendMethodName == "await_suspend") {
assert(!AwaiterClass ||
!AwaiterClass->hasAttr<CoroAwaitSuspendDestroyAttr>());
return false;
} else {
llvm::report_fatal_error(
"Wrong method in [[clang::coro_await_suspend_destroy]] check: "
"expected 'await_suspend' or 'await_suspend_destroy', but got '" +
SuspendMethodName + "'");
}

return false;
}

// Emit suspend expression which roughly looks like:
//
// auto && x = CommonExpr();
Expand Down Expand Up @@ -220,51 +280,54 @@ namespace {
RValue RV;
};
}
static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
CoroutineSuspendExpr const &S,
AwaitKind Kind, AggValueSlot aggSlot,
bool ignoreResult, bool forLValue) {
auto *E = S.getCommonExpr();

auto CommonBinder =
CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
auto UnbindCommonOnExit =
llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });

auto Prefix = buildSuspendPrefixStr(Coro, Kind);
BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));

// If expression is ready, no need to suspend.
CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
// The simplified `await_suspend_destroy` path avoids suspend intrinsics.
//
// If a coro has only `await_suspend_destroy` and trivial (`suspend_never`)
// awaiters, then subsequent passes are able to allocate its frame on-stack.
//
// As of 2025, there is still an optimization gap between a realistic
// short-circuiting coro, and the equivalent plain function. For a
// guesstimate, expect 4-5ns per call on x86. One idea for improvement is to
// also elide trivial suspends like `std::suspend_never`, in order to hit the
// `HasCoroSuspend` path in `CoroEarly.cpp`.
static void emitAwaitSuspendDestroy(CodeGenFunction &CGF, CGCoroData &Coro,
llvm::Function *SuspendWrapper,
llvm::Value *Awaiter, llvm::Value *Frame,
bool AwaitSuspendCanThrow) {
SmallVector<llvm::Value *, 2> DirectCallArgs;
DirectCallArgs.push_back(Awaiter);
DirectCallArgs.push_back(Frame);

if (AwaitSuspendCanThrow) {
CGF.EmitCallOrInvoke(SuspendWrapper, DirectCallArgs);
} else {
CGF.EmitNounwindRuntimeCall(SuspendWrapper, DirectCallArgs);
}

// Otherwise, emit suspend logic.
CGF.EmitBlock(SuspendBlock);
CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
}

static void emitStandardAwaitSuspend(
CodeGenFunction &CGF, CGCoroData &Coro, CoroutineSuspendExpr const &S,
llvm::Function *SuspendWrapper, llvm::Value *Awaiter, llvm::Value *Frame,
bool AwaitSuspendCanThrow, SmallString<32> Prefix, BasicBlock *ReadyBlock,
AwaitKind Kind, CoroutineSuspendExpr::SuspendReturnType SuspendReturnType) {
auto &Builder = CGF.Builder;
llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});

auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
CGF.CurFn->getName(), Prefix, S);

CGF.CurCoro.InSuspendBlock = true;

assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
"expected to be called in coroutine context");

SmallVector<llvm::Value *, 3> SuspendIntrinsicCallArgs;
SuspendIntrinsicCallArgs.push_back(
CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF));

SuspendIntrinsicCallArgs.push_back(CGF.CurCoro.Data->CoroBegin);
SuspendIntrinsicCallArgs.push_back(Awaiter);
SuspendIntrinsicCallArgs.push_back(Frame);
SuspendIntrinsicCallArgs.push_back(SuspendWrapper);
BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));

const auto SuspendReturnType = S.getSuspendReturnType();
llvm::Intrinsic::ID AwaitSuspendIID;
llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});

llvm::Intrinsic::ID AwaitSuspendIID;
switch (SuspendReturnType) {
case CoroutineSuspendExpr::SuspendReturnType::SuspendVoid:
AwaitSuspendIID = llvm::Intrinsic::coro_await_suspend_void;
Expand All @@ -279,12 +342,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co

llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID);

// SuspendHandle might throw since it also resumes the returned handle.
const bool AwaitSuspendCanThrow =
SuspendReturnType ==
CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
StmtCanThrow(S.getSuspendExpr());

llvm::CallBase *SuspendRet = nullptr;
// FIXME: add call attributes?
if (AwaitSuspendCanThrow)
Expand Down Expand Up @@ -332,6 +389,54 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
// Emit cleanup for this suspend point.
CGF.EmitBlock(CleanupBlock);
CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
}

static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
CoroutineSuspendExpr const &S,
AwaitKind Kind, AggValueSlot aggSlot,
bool ignoreResult, bool forLValue) {
auto *E = S.getCommonExpr();

auto CommonBinder =
CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
auto UnbindCommonOnExit =
llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });

auto Prefix = buildSuspendPrefixStr(Coro, Kind);
BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));

// If expression is ready, no need to suspend.
CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);

// Otherwise, emit suspend logic.
CGF.EmitBlock(SuspendBlock);

auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
CGF.CurFn->getName(), Prefix, S);

assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
"expected to be called in coroutine context");

// SuspendHandle might throw since it also resumes the returned handle.
const auto SuspendReturnType = S.getSuspendReturnType();
const bool AwaitSuspendCanThrow =
SuspendReturnType ==
CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
StmtCanThrow(S.getSuspendExpr());

llvm::Value *Awaiter =
CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF);
llvm::Value *Frame = CGF.CurCoro.Data->CoroBegin;

if (useCoroAwaitSuspendDestroy(S)) { // Call `await_suspend_destroy` & cleanup
emitAwaitSuspendDestroy(CGF, Coro, SuspendWrapper, Awaiter, Frame,
AwaitSuspendCanThrow);
} else { // Normal suspend path -- can actually suspend, uses intrinsics
emitStandardAwaitSuspend(CGF, Coro, S, SuspendWrapper, Awaiter, Frame,
AwaitSuspendCanThrow, Prefix, ReadyBlock, Kind,
SuspendReturnType);
}

// Emit await_resume expression.
CGF.EmitBlock(ReadyBlock);
Expand All @@ -341,6 +446,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
CXXTryStmt *TryStmt = nullptr;
if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
StmtCanThrow(S.getResumeExpr())) {
auto &Builder = CGF.Builder;
Coro.ResumeEHVar =
CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
Builder.CreateFlagStore(true, Coro.ResumeEHVar);
Expand Down
Loading
Loading