Skip to content

Commit 78c958d

Browse files
committed
Elide suspension points via [[clang::coro_destroyed_on_suspend]]
See `AttrDocs.td` for the user-facing docs. The immediate motivation was that I noticed that short-circuiting coroutines failed to optimize well. My initial demo program is here: https://godbolt.org/z/oMdq8zfov ``` #include <coroutine> #include <optional> // `optional_wrapper` exists since `get_return_object()` can't return // `std::optional` directly. C++ coroutines have a fundamental timing mismatch // between when the return object is created and when the value is available: // // 1) Early (coroutine startup): `get_return_object()` is called and must return // something immediately. // 2) Later (when `co_return` executes): `return_value(T)` is called with the // actual value. // 3) Issue: If `get_return_object()` returns the storage, it's empty when // returned, and writing to it later cannot affect the already-returned copy. template <typename T> struct optional_wrapper { std::optional<T> storage_; std::optional<T>*& pointer_; optional_wrapper(std::optional<T>*& p) : pointer_(p) { pointer_ = &storage_; } operator std::optional<T>() { return std::move(*storage_); } ~optional_wrapper() {} }; // Make `std::optional` a coroutine namespace std { template <typename T, typename... Args> struct coroutine_traits<optional<T>, Args...> { struct promise_type { optional<T>* storagePtr_ = nullptr; promise_type() = default; ::optional_wrapper<T> get_return_object() { return ::optional_wrapper<T>(storagePtr_); } suspend_never initial_suspend() const noexcept { return {}; } suspend_never final_suspend() const noexcept { return {}; } void return_value(T value) { *storagePtr_ = std::move(value); } void unhandled_exception() { // Leave storage_ empty to represent error } }; }; } // namespace std // Non-coroutine baseline -- compare to the `simple_coro` variants below std::optional<int> simple_func(const std::optional<int>& r) { if (r.has_value()) { return r.value() + 1; } return std::nullopt; // empty optional (error) } // Without `co_await`, this optimizes just like `simple_func`. // Unfortunately, this doesn't short-circuit when `r` is empty. std::optional<int> wrong_simple_coro(const std::optional<int>& r) { co_return r.value() + 2; } // `#if 0` adds a short-circuiting `co_await` that may suspend to // immediately exit the parent coroutine. Unfortunately, this // triggers a coro frame allocation, and pessimizes `simple_coro`. template <typename T> struct optional_awaitable { std::optional<T> opt_; bool await_ready() const noexcept { return opt_.has_value(); } T await_resume() { return opt_.value(); } template <typename Promise> [[clang::coro_destroyed_on_suspend]] void await_suspend( std::coroutine_handle<Promise> handle) { handle.destroy(); } }; template <typename T> optional_awaitable<T> operator co_await(std::optional<T> opt) { return {std::move(opt)}; } // Matches the logic of `simple_func`. Before this patch, adding a // "suspend-to-exit" await point forces an allocation. std::optional<int> simple_coro(const std::optional<int>& r) { co_return co_await std::move(r) + 4; } int main() { return simple_coro(std::optional<int>{8}).value() + wrong_simple_coro(std::optional<int>{16}).value() + simple_func(std::optional<int>{32}).value(); } ```
1 parent 1e815ce commit 78c958d

File tree

7 files changed

+274
-17
lines changed

7 files changed

+274
-17
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,10 @@ Removed Compiler Flags
116116
Attribute Changes in Clang
117117
--------------------------
118118

119+
- Introduced a new attribute ``[[clang::coro_destroyed_on_suspend]]`` on coroutine ``await_suspend``
120+
methods to indicate that the coroutine will be destroyed during suspension, enabling optimizations
121+
that skip the suspend point.
122+
119123
Improvements to Clang's diagnostics
120124
-----------------------------------
121125
- Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic

clang/include/clang/Basic/Attr.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
13521352
let SimpleHandler = 1;
13531353
}
13541354

1355+
def CoroDestroyedOnSuspend: InheritableAttr {
1356+
let Spellings = [Clang<"coro_destroyed_on_suspend">];
1357+
let Subjects = SubjectList<[CXXMethod]>;
1358+
let LangOpts = [CPlusPlus];
1359+
let Documentation = [CoroDestroyedOnSuspendDoc];
1360+
let SimpleHandler = 1;
1361+
}
1362+
13551363
// OSObject-based attributes.
13561364
def OSConsumed : InheritableParamAttr {
13571365
let Spellings = [Clang<"os_consumed">];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9244,6 +9244,66 @@ Example:
92449244
}];
92459245
}
92469246

9247+
def CoroDestroyedOnSuspendDoc : Documentation {
9248+
let Category = DocCatDecl;
9249+
let Content = [{
9250+
The ``[[clang::coro_destroyed_on_suspend]]`` attribute may be applied to a C++
9251+
``void await_suspend(handle)`` method belonging to a coroutine awaiter object.
9252+
9253+
**Warning:** The attribute must only be used when ``await_suspend`` is
9254+
guaranteed to call ``handle.destroy()`` before completing. Violating this
9255+
invariant **will** cause undefined behavior. Be sure to consider exceptions.
9256+
9257+
This attribute improves the optimization of "short-circuiting" coroutines. It
9258+
is useful when every suspend point in the coroutine is either (i) trivial
9259+
(`std::suspend_never`), or (ii) a `co_await` that can be translated into simple
9260+
control flow as:
9261+
9262+
.. code-block:: c++
9263+
9264+
if (awaiter.await_ready()) {
9265+
val = awaiter.await_resume();
9266+
} else {
9267+
awaiter.await_suspend();
9268+
return /* value representing the "execution short-circuited" outcome */
9269+
}
9270+
9271+
Adding the attribute permits the compiler to elide the `@llvm.coro.suspend`
9272+
intrinsic and go directly to the cleanup/destruction path, avoiding unnecessary
9273+
suspend/resume overhead. If all suspension points can be simplified via (i) or
9274+
(ii), then the coroutine should be able to be optimized nearly like a plain
9275+
function via the `isNoSuspendCoroutine` optimization. The typical wins are:
9276+
- no heap allocation
9277+
- smaller code size & better optimization
9278+
9279+
For example,
9280+
9281+
.. code-block:: c++
9282+
9283+
template <typename T>
9284+
struct optional_awaitable {
9285+
std::optional<T> opt_;
9286+
9287+
bool await_ready() const noexcept {
9288+
return opt_.has_value();
9289+
}
9290+
9291+
T await_resume() {
9292+
return std::move(opt_).value();
9293+
}
9294+
9295+
[[clang::coro_destroyed_on_suspend]]
9296+
template <typename Promise>
9297+
void await_suspend(std::coroutine_handle<Promise> handle) {
9298+
// DANGER: If any exit path from a `coro_destroyed_on_suspend`-marked
9299+
// `await_suspend` neglects to call `destroy()`, that will cause UB.
9300+
handle.destroy();
9301+
}
9302+
};
9303+
9304+
}];
9305+
}
9306+
92479307
def CountedByDocs : Documentation {
92489308
let Category = DocCatField;
92499309
let Content = [{

clang/lib/CodeGen/CGCoroutine.cpp

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,9 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
247247
auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
248248
auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
249249

250+
bool SuspendWillDestroyCoro = false;
250251
auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
251-
CGF.CurFn->getName(), Prefix, S);
252+
CGF.CurFn->getName(), Prefix, S, SuspendWillDestroyCoro);
252253

253254
CGF.CurCoro.InSuspendBlock = true;
254255

@@ -317,17 +318,24 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
317318
}
318319
}
319320

320-
// Emit the suspend point.
321-
const bool IsFinalSuspend = (Kind == AwaitKind::Final);
322-
llvm::Function *CoroSuspend =
323-
CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
324-
auto *SuspendResult = Builder.CreateCall(
325-
CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
326-
327-
// Create a switch capturing three possible continuations.
328-
auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
329-
Switch->addCase(Builder.getInt8(0), ReadyBlock);
330-
Switch->addCase(Builder.getInt8(1), CleanupBlock);
321+
// If suspend will destroy coroutine, skip the suspend intrinsic and go
322+
// straight to cleanup
323+
if (SuspendWillDestroyCoro) {
324+
// Skip the suspend point and go directly to cleanup
325+
CGF.EmitBranch(CleanupBlock);
326+
} else {
327+
// Emit the suspend point.
328+
const bool IsFinalSuspend = (Kind == AwaitKind::Final);
329+
llvm::Function *CoroSuspend =
330+
CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend);
331+
auto *SuspendResult = Builder.CreateCall(
332+
CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)});
333+
334+
// Create a switch capturing three possible continuations.
335+
auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2);
336+
Switch->addCase(Builder.getInt8(0), ReadyBlock);
337+
Switch->addCase(Builder.getInt8(1), CleanupBlock);
338+
}
331339

332340
// Emit cleanup for this suspend point.
333341
CGF.EmitBlock(CleanupBlock);
@@ -411,10 +419,9 @@ static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx,
411419
}
412420
#endif
413421

414-
llvm::Function *
415-
CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
416-
Twine const &SuspendPointName,
417-
CoroutineSuspendExpr const &S) {
422+
llvm::Function *CodeGenFunction::generateAwaitSuspendWrapper(
423+
Twine const &CoroName, Twine const &SuspendPointName,
424+
CoroutineSuspendExpr const &S, bool &SuspendWillDestroyCoro) {
418425
std::string FuncName =
419426
(CoroName + ".__await_suspend_wrapper__" + SuspendPointName).str();
420427

@@ -445,6 +452,30 @@ CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName,
445452
Fn->setMustProgress();
446453
Fn->addFnAttr(llvm::Attribute::AttrKind::AlwaysInline);
447454

455+
// Is the underlying `await_suspend()` marked `coro_destroyed_on_suspend`?
456+
SuspendWillDestroyCoro = false;
457+
// Only check for the attribute if the suspend expression returns void. When
458+
// a handle is returned, the outermost method call is `.address()`, and
459+
// there's no point in doing more work to obtain the inner `await_suspend`.
460+
if (S.getSuspendReturnType() == CoroutineSuspendExpr::SuspendReturnType::SuspendVoid) {
461+
if (auto *CE = dyn_cast<CallExpr>(S.getSuspendExpr()->IgnoreImplicit())) {
462+
if (auto *ME = dyn_cast<MemberExpr>(CE->getCallee())) {
463+
if (auto *MethodDecl = dyn_cast<CXXMethodDecl>(ME->getMemberDecl())) {
464+
if (MethodDecl->getNameAsString() == "await_suspend") {
465+
for (const auto *Attr : MethodDecl->attrs()) {
466+
if (Attr->getKind() == attr::Kind::CoroDestroyedOnSuspend) {
467+
SuspendWillDestroyCoro = true;
468+
break;
469+
}
470+
}
471+
} else {
472+
assert(false && "Unexpected method for [[clang::coro_destroyed_on_suspend]]");
473+
}
474+
}
475+
}
476+
}
477+
}
478+
448479
StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args);
449480

450481
// FIXME: add TBAA metadata to the loads

clang/lib/CodeGen/CodeGenFunction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,8 @@ class CodeGenFunction : public CodeGenTypeCache {
383383
// where type is one of (void, i1, ptr)
384384
llvm::Function *generateAwaitSuspendWrapper(Twine const &CoroName,
385385
Twine const &SuspendPointName,
386-
CoroutineSuspendExpr const &S);
386+
CoroutineSuspendExpr const &S,
387+
bool &SuspendWillDestroyCoro);
387388

388389
/// CurGD - The GlobalDecl for the current function being compiled.
389390
GlobalDecl CurGD;
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
2+
// RUN: -disable-llvm-passes | FileCheck %s --check-prefix=CHECK-INITIAL
3+
// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \
4+
// RUN: -O2 | FileCheck %s --check-prefix=CHECK-OPTIMIZED
5+
6+
#include "Inputs/coroutine.h"
7+
8+
// Awaitable with `coro_destroyed_on_suspend` attribute
9+
struct DestroyingAwaitable {
10+
bool await_ready() { return false; }
11+
[[clang::coro_destroyed_on_suspend]]
12+
void await_suspend(std::coroutine_handle<> h) {
13+
h.destroy();
14+
}
15+
void await_resume() {}
16+
};
17+
18+
// Awaitable without `coro_destroyed_on_suspend` (normal behavior)
19+
struct NormalAwaitable {
20+
bool await_ready() { return false; }
21+
void await_suspend(std::coroutine_handle<> h) {}
22+
void await_resume() {}
23+
};
24+
25+
// Coroutine type with `std::suspend_never` for initial/final suspend
26+
struct Task {
27+
struct promise_type {
28+
Task get_return_object() { return {}; }
29+
std::suspend_never initial_suspend() { return {}; }
30+
std::suspend_never final_suspend() noexcept { return {}; }
31+
void return_void() {}
32+
void unhandled_exception() {}
33+
};
34+
};
35+
36+
// Single co_await with coro_destroyed_on_suspend.
37+
// Should result in no allocation after optimization.
38+
Task test_single_destroying_await() {
39+
co_await DestroyingAwaitable{};
40+
}
41+
42+
// CHECK-INITIAL-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv
43+
// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
44+
// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
45+
46+
// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv
47+
// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
48+
// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
49+
// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
50+
51+
// Test multiple `co_await`s, all with `coro_destroyed_on_suspend`.
52+
// This should also result in no allocation after optimization.
53+
Task test_multiple_destroying_awaits(bool condition) {
54+
co_await DestroyingAwaitable{};
55+
co_await DestroyingAwaitable{};
56+
if (condition) {
57+
co_await DestroyingAwaitable{};
58+
}
59+
}
60+
61+
// CHECK-INITIAL-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
62+
// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
63+
// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
64+
65+
// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb
66+
// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
67+
// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
68+
// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
69+
70+
// Mixed awaits - some with `coro_destroyed_on_suspend`, some without.
71+
// We should still see allocation because not all awaits destroy the coroutine.
72+
Task test_mixed_awaits() {
73+
co_await NormalAwaitable{}; // Must precede "destroy" to be reachable
74+
co_await DestroyingAwaitable{};
75+
}
76+
77+
// CHECK-INITIAL-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv
78+
// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc
79+
// CHECK-INITIAL: call{{.*}} @llvm.coro.begin
80+
81+
// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv
82+
// CHECK-OPTIMIZED: call{{.*}} @_Znwm
83+
84+
85+
// Check the attribute detection affects control flow. With
86+
// `coro_destroyed_on_suspend`, the suspend point should be skipped and control
87+
// should go directly to cleanup instead of the normal suspend logic
88+
Task test_attribute_detection() {
89+
co_await DestroyingAwaitable{};
90+
// Unreachable in OPTIMIZED, so those builds don't see an allocation.
91+
co_await NormalAwaitable{};
92+
}
93+
94+
// Check that we skip the normal suspend intrinsic and go directly to cleanup.
95+
// This is the key behavioral difference the attribute enables.
96+
//
97+
// CHECK-INITIAL-LABEL: define{{.*}} void @_Z24test_attribute_detectionv
98+
// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__await
99+
// CHECK-NEXT: br label %await.cleanup
100+
// CHECK-INITIAL-NOT: call{{.*}} @llvm.coro.suspend
101+
// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__await
102+
// CHECK-INITIAL: call{{.*}} @llvm.coro.suspend
103+
// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__final
104+
105+
// Since `co_await DestroyingAwaitable{}` gets converted into an unconditional
106+
// branch, the `co_await NormalAwaitable{}` is unreachable in optimized builds.
107+
//
108+
// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
109+
// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
110+
// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
111+
112+
// Template awaitable with `coro_destroyed_on_suspend` attribute
113+
template<typename T>
114+
struct TemplateDestroyingAwaitable {
115+
bool await_ready() { return false; }
116+
117+
[[clang::coro_destroyed_on_suspend]]
118+
void await_suspend(std::coroutine_handle<> h) {
119+
h.destroy();
120+
}
121+
122+
void await_resume() {}
123+
};
124+
125+
Task test_template_destroying_await() {
126+
co_await TemplateDestroyingAwaitable<int>{};
127+
}
128+
129+
// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z30test_template_destroying_awaitv
130+
// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc
131+
// CHECK-OPTIMIZED-NOT: call{{.*}} malloc
132+
// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm
133+
134+
struct InvalidDestroyingAwaitable {
135+
bool await_ready() { return false; }
136+
137+
// The attribute is ignored, since the function doesn't return `void`!
138+
[[clang::coro_destroyed_on_suspend]]
139+
bool await_suspend(std::coroutine_handle<> h) {
140+
h.destroy();
141+
return true;
142+
}
143+
144+
void await_resume() {}
145+
};
146+
147+
Task test_invalid_destroying_await() {
148+
co_await InvalidDestroyingAwaitable{};
149+
}
150+
151+
// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z29test_invalid_destroying_awaitv
152+
// CHECK-OPTIMIZED: call{{.*}} @_Znwm

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
// CHECK-NEXT: Convergent (SubjectMatchRule_function)
6363
// CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record)
6464
// CHECK-NEXT: CoroAwaitElidableArgument (SubjectMatchRule_variable_is_parameter)
65+
// CHECK-NEXT: CoroDestroyedOnSuspend (SubjectMatchRule_function_is_member)
6566
// CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function)
6667
// CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record)
6768
// CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)

0 commit comments

Comments
 (0)