Skip to content

Commit edd770b

Browse files
lxfindtstellar
authored andcommitted
[Coroutine] Properly deal with byval and noalias parameters
This patch is to address https://bugs.llvm.org/show_bug.cgi?id=48857. Previous attempts can be found in D104007 and D101980. A lot of discussions can be found in those two patches. To summarize the bug: When Clang emits IR for coroutines, the first thing it does is to make a copy of every argument to the local stack, so that uses of the arguments in the function will all refer to the local copies instead of the arguments directly. However, in some cases we find that arguments are still directly used: When Clang emits IR for a function that has pass-by-value arguments, sometimes it emits an argument with byval attribute. A byval attribute is considered to be local to the function (just like alloca) and hence it can be easily determined that it does not alias other values. If in the IR there exists a memcpy from a byval argument to a local alloca, and then from that local alloca to another alloca, MemCpyOpt will optimize out the first memcpy because byval argument's content will not change. This causes issues because after a coroutine suspension, the byval argument may die outside of the function, and latter uses will lead to memory use-after-free. This is only a problem for arguments with either byval attribute or noalias attribute, because only these two kinds are considered local. Arguments without these two attributes will be considered to alias coro_suspend and hence we won't have this problem. So we need to be able to deal with these two attributes in coroutines properly. For noalias arguments, since coro_suspend may potentially change the value of any argument outside of the function, we simply shouldn't mark any argument in a coroutiune as noalias. This can be taken care of in CoroEarly pass. For byval arguments, if such an argument needs to live across suspensions, we will have to copy their value content to the frame, not just the pointer. Differential Revision: https://reviews.llvm.org/D104184 (cherry picked from commit 3522167)
1 parent 1a9f4b3 commit edd770b

File tree

4 files changed

+202
-5
lines changed

4 files changed

+202
-5
lines changed

llvm/lib/Transforms/Coroutines/CoroEarly.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
149149
bool Changed = false;
150150
CoroIdInst *CoroId = nullptr;
151151
SmallVector<CoroFreeInst *, 4> CoroFrees;
152+
bool HasCoroSuspend = false;
152153
for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) {
153154
Instruction &I = *IB++;
154155
if (auto *CB = dyn_cast<CallBase>(&I)) {
@@ -163,6 +164,7 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
163164
// pass expects that there is at most one final suspend point.
164165
if (cast<CoroSuspendInst>(&I)->isFinal())
165166
CB->setCannotDuplicate();
167+
HasCoroSuspend = true;
166168
break;
167169
case Intrinsic::coro_end_async:
168170
case Intrinsic::coro_end:
@@ -213,6 +215,13 @@ bool Lowerer::lowerEarlyIntrinsics(Function &F) {
213215
if (CoroId)
214216
for (CoroFreeInst *CF : CoroFrees)
215217
CF->setArgOperand(0, CoroId);
218+
// Coroutine suspention could potentially lead to any argument modified
219+
// outside of the function, hence arguments should not have noalias
220+
// attributes.
221+
if (HasCoroSuspend)
222+
for (Argument &A : F.args())
223+
if (A.hasNoAliasAttr())
224+
A.removeAttr(Attribute::NoAlias);
216225
return Changed;
217226
}
218227

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,13 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
781781
PromiseAlloca, DenseMap<Instruction *, llvm::Optional<APInt>>{}, false);
782782
// Create an entry for every spilled value.
783783
for (auto &S : FrameData.Spills) {
784-
FieldIDType Id = B.addField(S.first->getType(), None);
784+
Type *FieldType = S.first->getType();
785+
// For byval arguments, we need to store the pointed value in the frame,
786+
// instead of the pointer itself.
787+
if (const Argument *A = dyn_cast<Argument>(S.first))
788+
if (A->hasByValAttr())
789+
FieldType = FieldType->getPointerElementType();
790+
FieldIDType Id = B.addField(FieldType, None);
785791
FrameData.setFieldIndex(S.first, Id);
786792
}
787793

@@ -1149,6 +1155,7 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
11491155
// Create a store instruction storing the value into the
11501156
// coroutine frame.
11511157
Instruction *InsertPt = nullptr;
1158+
bool NeedToCopyArgPtrValue = false;
11521159
if (auto *Arg = dyn_cast<Argument>(Def)) {
11531160
// For arguments, we will place the store instruction right after
11541161
// the coroutine frame pointer instruction, i.e. bitcast of
@@ -1159,6 +1166,9 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
11591166
// from the coroutine function.
11601167
Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
11611168

1169+
if (Arg->hasByValAttr())
1170+
NeedToCopyArgPtrValue = true;
1171+
11621172
} else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
11631173
// Don't spill immediately after a suspend; splitting assumes
11641174
// that the suspend will be followed by a branch.
@@ -1193,7 +1203,15 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
11931203
Builder.SetInsertPoint(InsertPt);
11941204
auto *G = Builder.CreateConstInBoundsGEP2_32(
11951205
FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1196-
Builder.CreateStore(Def, G);
1206+
if (NeedToCopyArgPtrValue) {
1207+
// For byval arguments, we need to store the pointed value in the frame,
1208+
// instead of the pointer itself.
1209+
auto *Value =
1210+
Builder.CreateLoad(Def->getType()->getPointerElementType(), Def);
1211+
Builder.CreateStore(Value, G);
1212+
} else {
1213+
Builder.CreateStore(Def, G);
1214+
}
11971215

11981216
BasicBlock *CurrentBlock = nullptr;
11991217
Value *CurrentReload = nullptr;
@@ -1207,9 +1225,12 @@ static Instruction *insertSpills(const FrameDataInfo &FrameData,
12071225

12081226
auto *GEP = GetFramePointer(E.first);
12091227
GEP->setName(E.first->getName() + Twine(".reload.addr"));
1210-
CurrentReload = Builder.CreateLoad(
1211-
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1212-
E.first->getName() + Twine(".reload"));
1228+
if (NeedToCopyArgPtrValue)
1229+
CurrentReload = GEP;
1230+
else
1231+
CurrentReload = Builder.CreateLoad(
1232+
FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1233+
E.first->getName() + Twine(".reload"));
12131234

12141235
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
12151236
for (DbgDeclareInst *DDI : DIs) {
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
; RUN: opt < %s -passes=coro-split -S | FileCheck %s
2+
%promise_type = type { i8 }
3+
%struct.A = type <{ i64, i64, i32, [4 x i8] }>
4+
5+
; Function Attrs: noinline ssp uwtable mustprogress
6+
define %promise_type* @foo(%struct.A* nocapture readonly byval(%struct.A) align 8 %a1) #0 {
7+
entry:
8+
%__promise = alloca %promise_type, align 1
9+
%a2 = alloca %struct.A, align 8
10+
%0 = getelementptr inbounds %promise_type, %promise_type* %__promise, i64 0, i32 0
11+
%1 = call token @llvm.coro.id(i32 16, i8* nonnull %0, i8* bitcast (%promise_type* (%struct.A*)* @foo to i8*), i8* null)
12+
%2 = call i1 @llvm.coro.alloc(token %1)
13+
br i1 %2, label %coro.alloc, label %coro.init
14+
15+
coro.alloc: ; preds = %entry
16+
%3 = call i64 @llvm.coro.size.i64()
17+
%call = call noalias nonnull i8* @_Znwm(i64 %3) #9
18+
br label %coro.init
19+
20+
coro.init: ; preds = %coro.alloc, %entry
21+
%4 = phi i8* [ null, %entry ], [ %call, %coro.alloc ]
22+
%5 = call i8* @llvm.coro.begin(token %1, i8* %4) #10
23+
%6 = bitcast %struct.A* %a1 to i8*
24+
call void @llvm.lifetime.start.p0i8(i64 1, i8* nonnull %0) #2
25+
%call2 = call %promise_type* @_ZN4task12promise_type17get_return_objectEv(%promise_type* nonnull dereferenceable(1) %__promise)
26+
call void @initial_suspend(%promise_type* nonnull dereferenceable(1) %__promise)
27+
%7 = call token @llvm.coro.save(i8* null)
28+
call fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8* %5) #2
29+
%8 = call i8 @llvm.coro.suspend(token %7, i1 false)
30+
switch i8 %8, label %coro.ret [
31+
i8 0, label %init.ready
32+
i8 1, label %cleanup33
33+
]
34+
35+
init.ready: ; preds = %coro.init
36+
%9 = bitcast %struct.A* %a2 to i8*
37+
call void @llvm.lifetime.start.p0i8(i64 24, i8* nonnull %9) #2
38+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %6, i64 24, i1 false)
39+
call void @llvm.lifetime.end.p0i8(i64 24, i8* nonnull %9) #2
40+
call void @_ZN4task12promise_type13final_suspendEv(%promise_type* nonnull dereferenceable(1) %__promise) #2
41+
%10 = call token @llvm.coro.save(i8* null)
42+
call fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8* %5) #2
43+
%11 = call i8 @llvm.coro.suspend(token %10, i1 true) #10
44+
%switch = icmp ult i8 %11, 2
45+
br i1 %switch, label %cleanup33, label %coro.ret
46+
47+
cleanup33: ; preds = %init.ready, %coro.init
48+
call void @llvm.lifetime.end.p0i8(i64 1, i8* nonnull %0) #2
49+
%12 = call i8* @llvm.coro.free(token %1, i8* %5)
50+
%.not = icmp eq i8* %12, null
51+
br i1 %.not, label %coro.ret, label %coro.free
52+
53+
coro.free: ; preds = %cleanup33
54+
call void @_ZdlPv(i8* nonnull %12) #2
55+
br label %coro.ret
56+
57+
coro.ret: ; preds = %coro.free, %cleanup33, %init.ready, %coro.init
58+
%13 = call i1 @llvm.coro.end(i8* null, i1 false) #10
59+
ret %promise_type* %call2
60+
}
61+
62+
; check that the frame contains the entire struct, instead of just the struct pointer
63+
; CHECK: %foo.Frame = type { void (%foo.Frame*)*, void (%foo.Frame*)*, %promise_type, %struct.A, i1 }
64+
65+
; Function Attrs: argmemonly nounwind readonly
66+
declare token @llvm.coro.id(i32, i8* readnone, i8* nocapture readonly, i8*) #1
67+
68+
; Function Attrs: nounwind
69+
declare i1 @llvm.coro.alloc(token) #2
70+
71+
; Function Attrs: nobuiltin nofree allocsize(0)
72+
declare nonnull i8* @_Znwm(i64) local_unnamed_addr #3
73+
74+
; Function Attrs: nounwind readnone
75+
declare i64 @llvm.coro.size.i64() #4
76+
77+
; Function Attrs: nounwind
78+
declare i8* @llvm.coro.begin(token, i8* writeonly) #2
79+
80+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
81+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
82+
83+
; Function Attrs: argmemonly nofree nounwind willreturn
84+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #6
85+
86+
; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress
87+
declare %promise_type* @_ZN4task12promise_type17get_return_objectEv(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2
88+
89+
; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress
90+
declare void @initial_suspend(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2
91+
92+
; Function Attrs: nounwind
93+
declare token @llvm.coro.save(i8*) #2
94+
95+
; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress
96+
declare hidden fastcc void @_ZNSt12experimental13coroutines_v116coroutine_handleIN4task12promise_typeEE12from_addressEPv(i8*) unnamed_addr #7 align 2
97+
98+
; Function Attrs: argmemonly nofree nosync nounwind willreturn
99+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
100+
101+
; Function Attrs: nounwind
102+
declare i8 @llvm.coro.suspend(token, i1) #2
103+
104+
; Function Attrs: noinline nounwind ssp uwtable willreturn mustprogress
105+
declare void @_ZN4task12promise_type13final_suspendEv(%promise_type* nonnull dereferenceable(1)) local_unnamed_addr #7 align 2
106+
107+
; Function Attrs: nounwind
108+
declare i1 @llvm.coro.end(i8*, i1) #2
109+
110+
; Function Attrs: nobuiltin nounwind
111+
declare void @_ZdlPv(i8*) local_unnamed_addr #8
112+
113+
; Function Attrs: argmemonly nounwind readonly
114+
declare i8* @llvm.coro.free(token, i8* nocapture readonly) #1
115+
116+
attributes #0 = { noinline ssp uwtable mustprogress "coroutine.presplit"="1" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
117+
attributes #1 = { argmemonly nounwind readonly }
118+
attributes #2 = { nounwind }
119+
attributes #3 = { nobuiltin nofree allocsize(0) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
120+
attributes #4 = { nounwind readnone }
121+
attributes #5 = { argmemonly nofree nosync nounwind willreturn }
122+
attributes #6 = { argmemonly nofree nounwind willreturn }
123+
attributes #7 = { noinline nounwind ssp uwtable willreturn mustprogress "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
124+
attributes #8 = { nobuiltin nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+sse4.1,+ssse3,+x87" "tune-cpu"="generic" }
125+
attributes #9 = { allocsize(0) }
126+
attributes #10 = { noduplicate }
127+
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; RUN: opt < %s -S -passes=coro-early | FileCheck %s
2+
%struct.A = type <{ i64, i64, i32, [4 x i8] }>
3+
4+
define void @f(%struct.A* nocapture readonly noalias align 8 %a) {
5+
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
6+
%size = call i32 @llvm.coro.size.i32()
7+
%alloc = call i8* @malloc(i32 %size)
8+
%hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
9+
call void @print(i32 0)
10+
%s1 = call i8 @llvm.coro.suspend(token none, i1 false)
11+
switch i8 %s1, label %suspend [i8 0, label %resume
12+
i8 1, label %cleanup]
13+
resume:
14+
call void @print(i32 1)
15+
br label %cleanup
16+
17+
cleanup:
18+
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
19+
call void @free(i8* %mem)
20+
br label %suspend
21+
suspend:
22+
call i1 @llvm.coro.end(i8* %hdl, i1 0)
23+
ret void
24+
}
25+
26+
; check that the noalias attribute is removed from the argument
27+
; CHECK: define void @f(%struct.A* nocapture readonly align 8 %a)
28+
29+
declare token @llvm.coro.id(i32, i8*, i8*, i8*)
30+
declare i8* @llvm.coro.begin(token, i8*)
31+
declare i8* @llvm.coro.free(token, i8*)
32+
declare i32 @llvm.coro.size.i32()
33+
declare i8 @llvm.coro.suspend(token, i1)
34+
declare void @llvm.coro.resume(i8*)
35+
declare void @llvm.coro.destroy(i8*)
36+
declare i1 @llvm.coro.end(i8*, i1)
37+
38+
declare noalias i8* @malloc(i32)
39+
declare void @print(i32)
40+
declare void @free(i8*)

0 commit comments

Comments
 (0)