-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[CoroSplit] Rise lifetime.end to get smaller frame size #143333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-clang Author: Weibo He (NewSigma) ChangesLifetime markers determine whether we place alloca on the frame or on the stack. We can move Close #49716 Full diff: https://github.com/llvm/llvm-project/pull/143333.diff 7 Files Affected:
diff --git a/clang/test/CodeGenCoroutines/pr56919.cpp b/clang/test/CodeGenCoroutines/pr56919.cpp
index baa8c27ce6649..e709cecf6d93a 100644
--- a/clang/test/CodeGenCoroutines/pr56919.cpp
+++ b/clang/test/CodeGenCoroutines/pr56919.cpp
@@ -111,15 +111,15 @@ Task<void> Bar() { co_await Baz(); }
// CHECK: _Z3Quxv.destroy:{{.*}}
// CHECK-NEXT: #
-// CHECK-NEXT: movl $40, %esi
+// CHECK-NEXT: movl $32, %esi
// CHECK-NEXT: jmp _ZdlPvm@PLT
// CHECK: _Z3Bazv.destroy:{{.*}}
// CHECK-NEXT: #
-// CHECK-NEXT: movl $80, %esi
+// CHECK-NEXT: movl $64, %esi
// CHECK-NEXT: jmp _ZdlPvm
// CHECK: _Z3Barv.destroy:{{.*}}
// CHECK-NEXT: #
-// CHECK-NEXT: movl $120, %esi
+// CHECK-NEXT: movl $96, %esi
// CHECK-NEXT: jmp _ZdlPvm
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 53d78edda2e9f..fb56832dec9e3 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -18,6 +18,7 @@
#include "CoroInternal.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/StackLifetime.h"
#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfo.h"
@@ -1767,6 +1768,18 @@ static void eliminateSwiftError(Function &F, coro::Shape &Shape) {
}
}
+static bool isLifetimeStart(Instruction *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return false;
+}
+
+static bool isLifetimeEnd(Instruction *I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() == Intrinsic::lifetime_end;
+ return false;
+}
+
/// For each local variable that all of its user are only used inside one of
/// suspended region, we sink their lifetime.start markers to the place where
/// after the suspend block. Doing so minimizes the lifetime of each variable,
@@ -1819,7 +1832,7 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
for (User *U : AI->users()) {
Instruction *UI = cast<Instruction>(U);
- // For all users except lifetime.start markers, if they are all
+ // For all users except lifetime markers, if they are all
// dominated by one of the basic blocks and do not cross
// suspend points as well, then there is no need to spill the
// instruction.
@@ -1827,7 +1840,7 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
// Skip lifetime.start, GEP and bitcast used by lifetime.start
// markers.
- if (collectLifetimeStart(UI, AI))
+ if (collectLifetimeStart(UI, AI) || isLifetimeEnd(UI))
continue;
Valid = false;
break;
@@ -1850,6 +1863,78 @@ static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
}
}
+static bool mayEscape(Value *V, User *U) {
+ if (V == U->stripInBoundsOffsets() || isa<PHINode>(U))
+ return true;
+
+ if (auto *SI = dyn_cast<StoreInst>(U))
+ return SI->getValueOperand() == V;
+
+ if (auto *CB = dyn_cast<CallBase>(U)) {
+ unsigned OpCount = CB->arg_size();
+ for (unsigned Op = 0; Op < OpCount; ++Op)
+ if (V == CB->getArgOperand(Op) && !CB->doesNotCapture(Op))
+ return true;
+ }
+ return false;
+}
+
+// Find the suspend point that dominate all uses of alloca,
+// we will rise lifetime.end markers to the end of corresponding save block.
+static void riseLifetimeEndMarkers(Function &F, const coro::Shape &Shape) {
+ const PostDominatorTree PDT(F);
+ for (Instruction &I : instructions(F)) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(&I);
+ if (!AI)
+ continue;
+
+ SmallVector<Instruction *, 2> LifetimeEnds;
+ SmallPtrSet<BasicBlock *, 2> UserBBs{};
+ bool Escape = false;
+ for (User *U : AI->users()) {
+ auto *I = cast<Instruction>(U);
+ // lifetime markers are not actual uses
+ if (isLifetimeStart(I))
+ continue;
+
+ if (isLifetimeEnd(I))
+ LifetimeEnds.push_back(I);
+ else if (mayEscape(AI, U)) {
+ Escape = true;
+ break;
+ }
+ else
+ UserBBs.insert(I->getParent());
+ }
+
+ // Lifetime is unbounded if no lifetime.end
+ if (LifetimeEnds.empty() || Escape)
+ continue;
+
+ BasicBlock *DomBB = nullptr;
+ for (auto *Suspend : Shape.CoroSuspends) {
+ bool DomAll = llvm::all_of(UserBBs, [&](BasicBlock *UserBB) {
+ return PDT.dominates(Suspend->getParent(), UserBB);
+ });
+
+ if (DomAll) {
+ DomBB = Suspend->getParent();
+ break;
+ }
+ }
+
+ if (DomBB != nullptr) {
+ assert(coro::isSuspendBlock(DomBB));
+ auto *SaveBB = DomBB->getSinglePredecessor();
+ auto *NewEnd = LifetimeEnds[0]->clone();
+ NewEnd->insertBefore(SaveBB->getTerminator()->getIterator());
+
+ for (auto *I : LifetimeEnds)
+ I->eraseFromParent();
+ }
+ }
+}
+
static std::optional<std::pair<Value &, DIExpression &>>
salvageDebugInfoImpl(SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
bool UseEntryValue, Function *F, Value *Storage,
@@ -2070,9 +2155,10 @@ void coro::BaseABI::buildCoroutineFrame(bool OptimizeFrame) {
doRematerializations(F, Checker, IsMaterializable);
const DominatorTree DT(F);
- if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
- Shape.ABI != coro::ABI::RetconOnce)
+ if (!F.hasOptNone() && Shape.ABI == coro::ABI::Switch) {
sinkLifetimeStartMarkers(F, Shape, Checker, DT);
+ riseLifetimeEndMarkers(F, Shape);
+ }
// All values (that are not allocas) that needs to be spilled to the frame.
coro::SpillInfo Spills;
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-06.ll b/llvm/test/Transforms/Coroutines/coro-alloca-06.ll
index 89149ceba4c14..ecb42118ca3e9 100644
--- a/llvm/test/Transforms/Coroutines/coro-alloca-06.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-06.ll
@@ -51,6 +51,8 @@ suspend:
; CHECK-NEXT: store ptr [[TMP2]], ptr [[TMP0]], align 8
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[TMP1]])
; CHECK-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8
+; CHECK-NEXT: %index.addr1 = getelementptr inbounds nuw %f.Frame, ptr %hdl, i32 0, i32 2
+; CHECK-NEXT: store i1 false, ptr %index.addr1, align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[TMP1]])
;
diff --git a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll
index 3b0acdd794af4..def6cde84cb35 100644
--- a/llvm/test/Transforms/Coroutines/coro-alloca-07.ll
+++ b/llvm/test/Transforms/Coroutines/coro-alloca-07.ll
@@ -31,6 +31,8 @@ resume:
br label %cleanup
cleanup:
+ call void @llvm.lifetime.end.p0(i64 8, ptr %x)
+ call void @llvm.lifetime.end.p0(i64 8, ptr %y)
%mem = call ptr @llvm.coro.free(token %id, ptr %hdl)
call void @free(ptr %mem)
br label %suspend
diff --git a/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-01.ll b/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-01.ll
new file mode 100644
index 0000000000000..577d822a6c6d1
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-01.ll
@@ -0,0 +1,39 @@
+; Test allocas that do not cross suspension point will not go to frame
+; RUN: opt < %s -passes='cgscc(coro-split),early-cse' -S | FileCheck %s
+
+; CHECK: %large.alloca = alloca [500 x i8], align 16
+; CHECK-NOT: %large.alloca.reload.addr
+
+define void @f() presplitcoroutine {
+entry:
+ %large.alloca = alloca [500 x i8], align 16
+ %id = call token @llvm.coro.id(i32 16, ptr null, ptr null, ptr null)
+ %size = call i32 @llvm.coro.size.i32()
+ %mem = call ptr @malloc(i32 %size)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr %mem)
+ call void @llvm.lifetime.start.p0(i64 500, ptr %large.alloca)
+ %value = load i8, ptr %large.alloca, align 1
+ call void @consume(i8 %value)
+ %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+ switch i8 %0, label %exit [
+ i8 0, label %suspend
+ i8 1, label %cleanup
+ ]
+
+suspend:
+ br label %cleanup
+
+cleanup:
+ call void @llvm.lifetime.end.p0(i64 500, ptr %large.alloca)
+ %1 = call ptr @llvm.coro.free(token %id, ptr %mem)
+ call void @free(ptr %mem)
+ br label %exit
+
+exit:
+ %2 = call i1 @llvm.coro.end(ptr null, i1 false, token none)
+ ret void
+}
+
+declare void @consume(i8)
+declare ptr @malloc(i32)
+declare void @free(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-02.ll b/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-02.ll
new file mode 100644
index 0000000000000..b4d3cb4f07d08
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-02.ll
@@ -0,0 +1,61 @@
+; Test that we rise lifetime markers to the correct place if there are many suspend points.
+; RUN: opt < %s -passes='cgscc(coro-split),early-cse' -S | FileCheck %s
+
+; CHECK: %large.alloca = alloca [500 x i8], align 16
+; CHECK-NOT: %large.alloca.reload.addr
+
+define void @many_suspend() presplitcoroutine {
+entry:
+ %large.alloca = alloca [500 x i8], align 16
+ %id = call token @llvm.coro.id(i32 16, ptr null, ptr null, ptr null)
+ %size = call i64 @llvm.coro.size.i64()
+ %call = call noalias ptr @malloc(i64 %size)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr %call)
+ call void @llvm.lifetime.start.p0(i64 500, ptr %large.alloca)
+ %save1 = call token @llvm.coro.save(ptr null)
+ %sp1 = call i8 @llvm.coro.suspend(token %save1, i1 false)
+ switch i8 %sp1, label %coro.ret [
+ i8 0, label %await.ready
+ i8 1, label %cleanup
+ ]
+
+await.ready:
+ %save2 = call token @llvm.coro.save(ptr null)
+ %sp2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
+ switch i8 %sp2, label %coro.ret [
+ i8 0, label %await2.ready
+ i8 1, label %cleanup
+ ]
+
+await2.ready:
+ %value = load i8, ptr %large.alloca, align 1
+ call void @consume(i8 %value)
+ %save3 = call token @llvm.coro.save(ptr null)
+ %sp3 = call i8 @llvm.coro.suspend(token %save3, i1 false)
+ switch i8 %sp3, label %coro.ret [
+ i8 0, label %await3.ready
+ i8 1, label %cleanup
+ ]
+
+await3.ready:
+ %save4 = call token @llvm.coro.save(ptr null)
+ %sp4 = call i8 @llvm.coro.suspend(token %save4, i1 false)
+ switch i8 %sp4, label %coro.ret [
+ i8 0, label %cleanup
+ i8 1, label %cleanup
+ ]
+
+cleanup:
+ call void @llvm.lifetime.end.p0(i64 500, ptr %large.alloca)
+ %mem1 = call ptr @llvm.coro.free(token %id, ptr %hdl)
+ call void @free(ptr %mem1)
+ br label %coro.ret
+
+coro.ret:
+ %InResumePart = call i1 @llvm.coro.end(ptr null, i1 false, token none)
+ ret void
+}
+
+declare void @consume(i8)
+declare ptr @malloc(i64)
+declare void @free(ptr)
diff --git a/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-03.ll b/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-03.ll
new file mode 100644
index 0000000000000..5ea3c47bf9989
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-split-rise-lifetime-03.ll
@@ -0,0 +1,62 @@
+; Test we do not rise lifetime.end for allocas that may escape
+; RUN: opt < %s -passes='cgscc(coro-split),early-cse' -S | FileCheck %s
+
+; CHECK-NOT: %escape.gep = alloca [500 x i8], align 16
+; CHECK: %escape.gep.reload.addr
+
+; CHECK-NOT: %escape.store = alloca [500 x i8], align 16
+; CHECK: %escape.store.reload.addr
+
+; CHECK-NOT: %escape.call = alloca [500 x i8], align 16
+; CHECK: %escape.call.reload.addr
+
+define void @fn() presplitcoroutine {
+entry:
+ %id = call token @llvm.coro.id(i32 16, ptr null, ptr null, ptr null)
+ %size = call i64 @llvm.coro.size.i64()
+ %mem = call ptr @malloc(i64 %size)
+ %hdl = call ptr @llvm.coro.begin(token %id, ptr %mem)
+
+ %escape.gep = alloca [500 x i8], align 16
+ call void @llvm.lifetime.start.p0(i64 500, ptr %escape.gep)
+ %gep.ptr = getelementptr inbounds nuw i8, ptr %escape.gep, i64 8
+
+ %escape.store = alloca [500 x i8], align 16
+ %store.ptr = alloca ptr, align 8
+ call void @llvm.lifetime.start.p0(i64 500, ptr %escape.store)
+ call void @llvm.lifetime.start.p0(i64 8, ptr %store.ptr)
+ store ptr %escape.store, ptr %store.ptr, align 8
+
+ %escape.call = alloca [500 x i8], align 16
+ call void @llvm.lifetime.start.p0(i64 500, ptr %escape.call)
+ call void @consume(ptr %escape.call)
+
+ %save = call token @llvm.coro.save(ptr null)
+ %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
+ switch i8 %suspend, label %coro.ret [
+ i8 0, label %await.ready
+ i8 1, label %cleanup
+ ]
+
+await.ready:
+ call void @consume(ptr %gep.ptr)
+ call void @consume(ptr %store.ptr)
+ br label %cleanup
+
+cleanup:
+ call void @llvm.lifetime.end.p0(i64 500, ptr %escape.gep)
+ call void @llvm.lifetime.end.p0(i64 500, ptr %escape.store)
+ call void @llvm.lifetime.end.p0(i64 500, ptr %escape.call)
+ call void @llvm.lifetime.end.p0(i64 8, ptr %store.ptr)
+ %mem1 = call ptr @llvm.coro.free(token %id, ptr %hdl)
+ call void @free(ptr %mem1)
+ br label %coro.ret
+
+coro.ret:
+ %InResumePart = call i1 @llvm.coro.end(ptr null, i1 false, token none)
+ ret void
+}
+
+declare void @consume(ptr)
+declare ptr @malloc(i64)
+declare void @free(ptr)
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
ChuanqiXu9
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If we can move lifetimes, this should be a general optimization. We don't have to (and shouldn't) put this in Coroutine passes. Let's make it a general pass and send it for review for wider visibility.
|
It sounds interesting, though I still don’t have a clear idea about it. What are the benefits for non-coroutine code, and where exactly should we place it in the pipeline? |
|
lifetimes are widely used. I don't have a clear idea for position yet. |
|
Prefer make it a general pass |
Lifetime markers determine whether we place alloca on the frame or on the stack. We can move
lifetime.endto an optimized position to reduce the coroutine frame size. I propose finding the suspend point that dominates all uses of alloca and raising thelifetime.endmarkers to the end of the corresponding save block. This should significantly reduce the frame size.Close #49716