llvm · snarkmaster · Aug 7, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
@@ -136,6 +136,12 @@ Removed Compiler Flags
 Attribute Changes in Clang
 --------------------------
 
+- Introduced a new attribute ``[[clang::coro_await_suspend_destroy]]``.  When
+  applied to a coroutine awaiter class, it causes suspensions into this awaiter
+  to use a new `await_suspend_destroy(Promise&)` method instead of the standard
+  `await_suspend(std::coroutine_handle<...>)`.  The coroutine is then destroyed.
+  This improves code speed & size for "short-circuiting" coroutines.
+
 Improvements to Clang's diagnostics
 -----------------------------------
 - Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
@@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr {
   let SimpleHandler = 1;
 }
 
+def CoroAwaitSuspendDestroy: InheritableAttr {
+  let Spellings = [Clang<"coro_await_suspend_destroy">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let LangOpts = [CPlusPlus];
+  let Documentation = [CoroAwaitSuspendDestroyDoc];
+  let SimpleHandler = 1;
+}
+
 // OSObject-based attributes.
 def OSConsumed : InheritableParamAttr {
   let Spellings = [Clang<"os_consumed">];

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
@@ -9270,6 +9270,110 @@ Example:
 }];
 }
 
+def CoroAwaitSuspendDestroyDoc : Documentation {
+  let Category = DocCatDecl;
+  let Content = [{
+
+The ``[[clang::coro_await_suspend_destroy]]`` attribute may be applied to a C++
+coroutine awaiter type.  When this attribute is present, the awaiter must
+implement ``void await_suspend_destroy(Promise&)``.  If ``await_ready()``
+returns ``false`` at a suspension point, ``await_suspend_destroy`` will be
+called directly.  The coroutine being suspended will then be immediately
+destroyed.
+
+The new behavior is equivalent to this standard code:
+
+.. code-block:: c++
+
+  void await_suspend_destroy(YourPromise&) { ... }
+  void await_suspend(auto handle) {
+    await_suspend_destroy(handle.promise());
+    handle.destroy();
+  }
+
+This enables `await_suspend_destroy()` usage in portable awaiters — just add a
+stub ``await_suspend()`` as above.  Without ``coro_await_suspend_destroy``
+support, the awaiter will behave nearly identically, with the only difference
+being heap allocation instead of stack allocation for the coroutine frame.
+
+This attribute helps optimize short-circuiting coroutines.
+
+A short-circuiting coroutine is one where every ``co_await`` or ``co_yield``
+either immediately produces a value, or exits the coroutine.  In other words,
+they use coroutine syntax to concisely branch out of a synchronous function. 
+Here are close analogs in other languages:
+
+- Rust has ``Result<T>`` and a ``?`` operator to unpack it, while
+  ``folly::result<T>`` is a C++ short-circuiting coroutine, with ``co_await``
+  acting just like ``?``.
+
+- Haskell has ``Maybe`` & ``Error`` monads.  A short-circuiting ``co_await``
+  loosely corresponds to the monadic ``>>=``, whereas a short-circuiting
+  ``std::optional`` coro would be an exact analog of ``Maybe``.
+
+The C++ implementation relies on short-circuiting awaiters.  These either
+resume synchronously, or immediately destroy the awaiting coroutine and return
+control to the parent:
+
+.. code-block:: c++
+
+  T val;
+  if (awaiter.await_ready()) {
+    val = awaiter.await_resume();
+  } else {
+    awaiter.await_suspend();
+    return /* value representing the "execution short-circuited" outcome */;
+  }
+
+Then, a short-ciruiting coroutine is one where all the suspend points are
+either (i) trivial (like ``std::suspend_never``), or (ii) short-circuiting.
+
+Although the coroutine machinery makes them harder to optimize, logically,
+short-circuiting coroutines are like syntax sugar for regular functions where:
+
+- `co_await` allows expressions to return early.
+
+- `unhandled_exception()` lets the coroutine promise type wrap the function
+  body in an implicit try-catch.  This mandatory exception boundary behavior
+  can be desirable in robust, return-value-oriented programs that benefit from
+  short-circuiting coroutines.  If not, the promise can always re-throw.
+
+This attribute improves short-circuiting coroutines in a few ways:
+
+- **Avoid heap allocations for coro frames**: Allocating short-circuiting
+  coros on the stack makes code more predictable under memory pressure.
+  Without this attribute, LLVM cannot elide heap allocation even when all
+  awaiters are short-circuiting.
+
+- **Performance**: Significantly faster execution and smaller code size.
+
+- **Build time**: Faster compilation due to less IR being generated.
+
+Marking your ``await_suspend_destroy`` method as ``noexcept`` can sometimes
+further improve optimization.
+
+Here is a toy example of a portable short-circuiting awaiter:
+
+.. code-block:: c++
+
+  template <typename T>
+  struct [[clang::coro_await_suspend_destroy]] optional_awaiter {
+    std::optional<T> opt_;
+    bool await_ready() const noexcept { return opt_.has_value(); }
+    T await_resume() { return std::move(opt_).value(); }
+    void await_suspend_destroy(auto& promise) {
+      // Assume the return object of the outer coro defaults to "empty".
+    }
+    // Fallback for when `coro_await_suspend_destroy` is unavailable.
+    void await_suspend(auto handle) {
+      await_suspend_destroy(handle.promise());
+      handle.destroy();
+    }
+  };
+
+}];
+}
+
 def CountedByDocs : Documentation {
   let Category = DocCatField;
   let Content = [{

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12504,6 +12504,9 @@ def note_coroutine_promise_call_implicitly_required : Note<
 def err_await_suspend_invalid_return_type : Error<
   "return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)"
 >;
+def err_await_suspend_destroy_invalid_return_type : Error<
+  "return type of 'await_suspend_destroy' is required to be 'void' (have %0)"
+>;
 def note_await_ready_no_bool_conversion : Note<
   "return type of 'await_ready' is required to be contextually convertible to 'bool'"
 >;

diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp
@@ -174,6 +174,66 @@ static bool StmtCanThrow(const Stmt *S) {
   return false;
 }
 
+// Check if this suspend should be calling `await_suspend_destroy`
+static bool useCoroAwaitSuspendDestroy(const CoroutineSuspendExpr &S) {
+  // This can only be an `await_suspend_destroy` suspend expression if it
+  // returns void -- `buildCoawaitCalls` in `SemaCoroutine.cpp` asserts this.
+  // Moreover, when `await_suspend` returns a handle, the outermost method call
+  // is `.address()` -- making it harder to get the actual class or method.
+  if (S.getSuspendReturnType() !=
+      CoroutineSuspendExpr::SuspendReturnType::SuspendVoid) {
+    return false;
+  }
+
+  // `CGCoroutine.cpp` & `SemaCoroutine.cpp` must agree on whether this suspend
+  // expression uses `[[clang::coro_await_suspend_destroy]]`.
+  //
+  // Any mismatch is a serious bug -- we would either double-free, or fail to
+  // destroy the promise type. For this reason, we make our decision based on
+  // the method name, and fatal outside of the happy path -- including on
+  // failure to find a method name.
+  //
+  // As a debug-only check we also try to detect the `AwaiterClass`. This is
+  // secondary, because  detection of the awaiter type can be silently broken by
+  // small `buildCoawaitCalls` AST changes.
+  StringRef SuspendMethodName;           // Primary
+  CXXRecordDecl *AwaiterClass = nullptr; // Debug-only, best-effort
+  if (auto *SuspendCall =
+          dyn_cast<CallExpr>(S.getSuspendExpr()->IgnoreImplicit())) {
+    if (auto *SuspendMember = dyn_cast<MemberExpr>(SuspendCall->getCallee())) {
+      if (auto *BaseExpr = SuspendMember->getBase()) {
+        // `IgnoreImplicitAsWritten` is critical since `await_suspend...` can be
+        // invoked on the base of the actual awaiter, and the base need not have
+        // the attribute. In such cases, the AST will show the true awaiter
+        // being upcast to the base.
+        AwaiterClass = BaseExpr->IgnoreImplicitAsWritten()
+                           ->getType()
+                           ->getAsCXXRecordDecl();
+      }
+      if (auto *SuspendMethod =
+              dyn_cast<CXXMethodDecl>(SuspendMember->getMemberDecl())) {
+        SuspendMethodName = SuspendMethod->getName();
+      }
+    }
+  }
+  if (SuspendMethodName == "await_suspend_destroy") {
+    assert(!AwaiterClass ||
+           AwaiterClass->hasAttr<CoroAwaitSuspendDestroyAttr>());
+    return true;
+  } else if (SuspendMethodName == "await_suspend") {
+    assert(!AwaiterClass ||
+           !AwaiterClass->hasAttr<CoroAwaitSuspendDestroyAttr>());
+    return false;
+  } else {
+    llvm::report_fatal_error(
+        "Wrong method in [[clang::coro_await_suspend_destroy]] check: "
+        "expected 'await_suspend' or 'await_suspend_destroy', but got '" +
+        SuspendMethodName + "'");
+  }
+
+  return false;
+}
+
 // Emit suspend expression which roughly looks like:
 //
 //   auto && x = CommonExpr();
@@ -220,51 +280,54 @@ namespace {
     RValue RV;
   };
 }
-static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
-                                    CoroutineSuspendExpr const &S,
-                                    AwaitKind Kind, AggValueSlot aggSlot,
-                                    bool ignoreResult, bool forLValue) {
-  auto *E = S.getCommonExpr();
-
-  auto CommonBinder =
-      CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
-  auto UnbindCommonOnExit =
-      llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });
-
-  auto Prefix = buildSuspendPrefixStr(Coro, Kind);
-  BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
-  BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
-  BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));
 
-  // If expression is ready, no need to suspend.
-  CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
+// The simplified `await_suspend_destroy` path avoids suspend intrinsics.
+//
+// If a coro has only `await_suspend_destroy` and trivial (`suspend_never`)
+// awaiters, then subsequent passes are able to allocate its frame on-stack.
+//
+// As of 2025, there is still an optimization gap between a realistic
+// short-circuiting coro, and the equivalent plain function.  For a
+// guesstimate, expect 4-5ns per call on x86.  One idea for improvement is to
+// also elide trivial suspends like `std::suspend_never`, in order to hit the
+// `HasCoroSuspend` path in `CoroEarly.cpp`.
+static void emitAwaitSuspendDestroy(CodeGenFunction &CGF, CGCoroData &Coro,
+                                    llvm::Function *SuspendWrapper,
+                                    llvm::Value *Awaiter, llvm::Value *Frame,
+                                    bool AwaitSuspendCanThrow) {
+  SmallVector<llvm::Value *, 2> DirectCallArgs;
+  DirectCallArgs.push_back(Awaiter);
+  DirectCallArgs.push_back(Frame);
+
+  if (AwaitSuspendCanThrow) {
+    CGF.EmitCallOrInvoke(SuspendWrapper, DirectCallArgs);
+  } else {
+    CGF.EmitNounwindRuntimeCall(SuspendWrapper, DirectCallArgs);
+  }
 
-  // Otherwise, emit suspend logic.
-  CGF.EmitBlock(SuspendBlock);
+  CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
+}
 
+static void emitStandardAwaitSuspend(
+    CodeGenFunction &CGF, CGCoroData &Coro, CoroutineSuspendExpr const &S,
+    llvm::Function *SuspendWrapper, llvm::Value *Awaiter, llvm::Value *Frame,
+    bool AwaitSuspendCanThrow, SmallString<32> Prefix, BasicBlock *ReadyBlock,
+    AwaitKind Kind, CoroutineSuspendExpr::SuspendReturnType SuspendReturnType) {
   auto &Builder = CGF.Builder;
-  llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
-  auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
-  auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
-
-  auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
-      CGF.CurFn->getName(), Prefix, S);
 
   CGF.CurCoro.InSuspendBlock = true;
 
-  assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
-         "expected to be called in coroutine context");
-
   SmallVector<llvm::Value *, 3> SuspendIntrinsicCallArgs;
-  SuspendIntrinsicCallArgs.push_back(
-      CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF));
-
-  SuspendIntrinsicCallArgs.push_back(CGF.CurCoro.Data->CoroBegin);
+  SuspendIntrinsicCallArgs.push_back(Awaiter);
+  SuspendIntrinsicCallArgs.push_back(Frame);
   SuspendIntrinsicCallArgs.push_back(SuspendWrapper);
+  BasicBlock *CleanupBlock = CGF.createBasicBlock(Prefix + Twine(".cleanup"));
 
-  const auto SuspendReturnType = S.getSuspendReturnType();
-  llvm::Intrinsic::ID AwaitSuspendIID;
+  llvm::Function *CoroSave = CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_save);
+  auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy);
+  auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr});
 
+  llvm::Intrinsic::ID AwaitSuspendIID;
   switch (SuspendReturnType) {
   case CoroutineSuspendExpr::SuspendReturnType::SuspendVoid:
     AwaitSuspendIID = llvm::Intrinsic::coro_await_suspend_void;
@@ -279,12 +342,6 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
 
   llvm::Function *AwaitSuspendIntrinsic = CGF.CGM.getIntrinsic(AwaitSuspendIID);
 
-  // SuspendHandle might throw since it also resumes the returned handle.
-  const bool AwaitSuspendCanThrow =
-      SuspendReturnType ==
-          CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
-      StmtCanThrow(S.getSuspendExpr());
-
   llvm::CallBase *SuspendRet = nullptr;
   // FIXME: add call attributes?
   if (AwaitSuspendCanThrow)
@@ -332,6 +389,54 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   // Emit cleanup for this suspend point.
   CGF.EmitBlock(CleanupBlock);
   CGF.EmitBranchThroughCleanup(Coro.CleanupJD);
+}
+
+static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro,
+                                    CoroutineSuspendExpr const &S,
+                                    AwaitKind Kind, AggValueSlot aggSlot,
+                                    bool ignoreResult, bool forLValue) {
+  auto *E = S.getCommonExpr();
+
+  auto CommonBinder =
+      CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E);
+  auto UnbindCommonOnExit =
+      llvm::make_scope_exit([&] { CommonBinder.unbind(CGF); });
+
+  auto Prefix = buildSuspendPrefixStr(Coro, Kind);
+  BasicBlock *ReadyBlock = CGF.createBasicBlock(Prefix + Twine(".ready"));
+  BasicBlock *SuspendBlock = CGF.createBasicBlock(Prefix + Twine(".suspend"));
+
+  // If expression is ready, no need to suspend.
+  CGF.EmitBranchOnBoolExpr(S.getReadyExpr(), ReadyBlock, SuspendBlock, 0);
+
+  // Otherwise, emit suspend logic.
+  CGF.EmitBlock(SuspendBlock);
+
+  auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper(
+      CGF.CurFn->getName(), Prefix, S);
+
+  assert(CGF.CurCoro.Data && CGF.CurCoro.Data->CoroBegin &&
+         "expected to be called in coroutine context");
+
+  // SuspendHandle might throw since it also resumes the returned handle.
+  const auto SuspendReturnType = S.getSuspendReturnType();
+  const bool AwaitSuspendCanThrow =
+      SuspendReturnType ==
+          CoroutineSuspendExpr::SuspendReturnType::SuspendHandle ||
+      StmtCanThrow(S.getSuspendExpr());
+
+  llvm::Value *Awaiter =
+      CGF.getOrCreateOpaqueLValueMapping(S.getOpaqueValue()).getPointer(CGF);
+  llvm::Value *Frame = CGF.CurCoro.Data->CoroBegin;
+
+  if (useCoroAwaitSuspendDestroy(S)) { // Call `await_suspend_destroy` & cleanup
+    emitAwaitSuspendDestroy(CGF, Coro, SuspendWrapper, Awaiter, Frame,
+                            AwaitSuspendCanThrow);
+  } else { // Normal suspend path -- can actually suspend, uses intrinsics
+    emitStandardAwaitSuspend(CGF, Coro, S, SuspendWrapper, Awaiter, Frame,
+                             AwaitSuspendCanThrow, Prefix, ReadyBlock, Kind,
+                             SuspendReturnType);
+  }
 
   // Emit await_resume expression.
   CGF.EmitBlock(ReadyBlock);
@@ -341,6 +446,7 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co
   CXXTryStmt *TryStmt = nullptr;
   if (Coro.ExceptionHandler && Kind == AwaitKind::Init &&
       StmtCanThrow(S.getResumeExpr())) {
+    auto &Builder = CGF.Builder;
     Coro.ResumeEHVar =
         CGF.CreateTempAlloca(Builder.getInt1Ty(), Prefix + Twine("resume.eh"));
     Builder.CreateFlagStore(true, Coro.ResumeEHVar);