Skip to content

Commit 1927a19

Browse files
[SROA] Rewrite invariant group intrinsics after splitting alloca
A miscompilation issue has been addressed with improved handling. Fixes: #105537.
1 parent efc6d33 commit 1927a19

File tree

3 files changed

+110
-23
lines changed

3 files changed

+110
-23
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 70 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2656,6 +2656,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
26562656
SmallSetVector<PHINode *, 8> &PHIUsers;
26572657
SmallSetVector<SelectInst *, 8> &SelectUsers;
26582658

2659+
// Track invariant intrinsic for rewriting its memory intrinsics users.
2660+
std::optional<Value *> InvariantIntr;
2661+
26592662
// Utility IR builder, whose name prefix is setup for each visited use, and
26602663
// the insertion point is set to point to the user.
26612664
IRBuilderTy IRB;
@@ -2789,6 +2792,28 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
27892792
);
27902793
}
27912794

2795+
// Return getNewAllocaSlicePtr, unless the new alloca ptr has passed through
2796+
// invariant group intrinsic, in which case ensure to return such a pointer.
2797+
Value *getInvariantGroupPtrOrNewAllocaSlicePtr(IRBuilderTy &IRB,
2798+
Instruction *I) {
2799+
// Get the newly-rewritten invariant or the current one, if it has been
2800+
// rewritten in previous iterations or alloca was not split further.
2801+
if (isa<IntrinsicInst>(I) &&
2802+
cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
2803+
return InvariantIntr ? *InvariantIntr : I;
2804+
return getNewAllocaSlicePtr(IRB, I->getType());
2805+
}
2806+
2807+
// Return getPtrToNewAI, unless the new alloca ptr has passed through
2808+
// invariant group intrinsic, in which case ensure to return such a pointer.
2809+
Value *getInvariantGroupPtrOrPtrToNewAI(Instruction *I, unsigned AddrSpace,
2810+
bool IsVolatile) {
2811+
if (isa<IntrinsicInst>(I) &&
2812+
cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
2813+
return InvariantIntr ? *InvariantIntr : I;
2814+
return getPtrToNewAI(AddrSpace, IsVolatile);
2815+
}
2816+
27922817
/// Compute suitable alignment to access this slice of the *new*
27932818
/// alloca.
27942819
///
@@ -3146,7 +3171,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
31463171
if (!isa<ConstantInt>(II.getLength())) {
31473172
assert(!IsSplit);
31483173
assert(NewBeginOffset == BeginOffset);
3149-
II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3174+
II.setDest(getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr));
31503175
II.setDestAlignment(getSliceAlign());
31513176
// In theory we should call migrateDebugInfo here. However, we do not
31523177
// emit dbg.assign intrinsics for mem intrinsics storing through non-
@@ -3187,8 +3212,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
31873212
unsigned Sz = NewEndOffset - NewBeginOffset;
31883213
Constant *Size = ConstantInt::get(SizeTy, Sz);
31893214
MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3190-
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3191-
MaybeAlign(getSliceAlign()), II.isVolatile()));
3215+
getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr), II.getValue(),
3216+
Size, MaybeAlign(getSliceAlign()), II.isVolatile()));
31923217
if (AATags)
31933218
New->setAAMetadata(
31943219
AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
@@ -3261,7 +3286,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
32613286
V = convertValue(DL, IRB, V, AllocaTy);
32623287
}
32633288

3264-
Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3289+
Value *NewPtr = getInvariantGroupPtrOrPtrToNewAI(
3290+
OldPtr, II.getDestAddressSpace(), II.isVolatile());
32653291
StoreInst *New =
32663292
IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
32673293
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
@@ -3372,28 +3398,28 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
33723398
OtherAlign =
33733399
commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
33743400

3375-
if (EmitMemCpy) {
3376-
// Compute the other pointer, folding as much as possible to produce
3377-
// a single, simple GEP in most cases.
3378-
OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3379-
OtherPtr->getName() + ".");
3401+
// Compute the other pointer, folding as much as possible to produce
3402+
// a single, simple GEP in most cases.
3403+
OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3404+
OtherPtr->getName() + ".");
33803405

3381-
Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3406+
if (EmitMemCpy) {
3407+
Value *OutPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr);
33823408
Type *SizeTy = II.getLength()->getType();
33833409
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
33843410

33853411
Value *DestPtr, *SrcPtr;
33863412
MaybeAlign DestAlign, SrcAlign;
33873413
// Note: IsDest is true iff we're copying into the new alloca slice
33883414
if (IsDest) {
3389-
DestPtr = OurPtr;
3415+
DestPtr = OutPtr;
33903416
DestAlign = SliceAlign;
33913417
SrcPtr = OtherPtr;
33923418
SrcAlign = OtherAlign;
33933419
} else {
33943420
DestPtr = OtherPtr;
33953421
DestAlign = OtherAlign;
3396-
SrcPtr = OurPtr;
3422+
SrcPtr = OutPtr;
33973423
SrcAlign = SliceAlign;
33983424
}
33993425
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
@@ -3438,8 +3464,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
34383464
OtherTy = NewAllocaTy;
34393465
}
34403466

3441-
Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3442-
OtherPtr->getName() + ".");
34433467
MaybeAlign SrcAlign = OtherAlign;
34443468
MaybeAlign DstAlign = SliceAlign;
34453469
if (!IsDest)
@@ -3449,11 +3473,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
34493473
Value *DstPtr;
34503474

34513475
if (IsDest) {
3452-
DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3453-
SrcPtr = AdjPtr;
3476+
DstPtr = getInvariantGroupPtrOrPtrToNewAI(
3477+
OldPtr, II.getDestAddressSpace(), II.isVolatile());
3478+
SrcPtr = OtherPtr;
34543479
} else {
3455-
DstPtr = AdjPtr;
3456-
SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3480+
DstPtr = OtherPtr;
3481+
SrcPtr = getInvariantGroupPtrOrPtrToNewAI(
3482+
OldPtr, II.getSourceAddressSpace(), II.isVolatile());
34573483
}
34583484

34593485
Value *Src;
@@ -3531,8 +3557,33 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
35313557
return true;
35323558
}
35333559

3534-
if (II.isLaunderOrStripInvariantGroup())
3560+
if (II.isLaunderOrStripInvariantGroup()) {
3561+
Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3562+
Value *New = nullptr;
3563+
if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
3564+
New = IRB.CreateLaunderInvariantGroup(AdjustedPtr);
3565+
else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
3566+
New = IRB.CreateStripInvariantGroup(AdjustedPtr);
3567+
3568+
if (&OldAI == &NewAI) {
3569+
New->takeName(&II);
3570+
II.replaceAllUsesWith(New);
3571+
} else {
3572+
// If the alloca can be split further, memory intrinsics using the
3573+
// invariant group may also need to be rewritten. Record the invariant
3574+
// for when the memory intrinsic is later visited.
3575+
for (Use &U : II.uses())
3576+
if (isa<MemIntrinsic>(U.getUser())) {
3577+
if (!InvariantIntr)
3578+
InvariantIntr = New;
3579+
continue;
3580+
} else {
3581+
U.set(New);
3582+
}
3583+
}
3584+
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
35353585
return true;
3586+
}
35363587

35373588
assert(II.getArgOperand(1) == OldPtr);
35383589
// Lifetime intrinsics are only promotable if they cover the whole alloca.

llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
8181
return false;
8282
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
8383
if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
84-
II->getIntrinsicID() != Intrinsic::fake_use)
84+
II->getIntrinsicID() != Intrinsic::fake_use &&
85+
!II->isLaunderOrStripInvariantGroup())
8586
return false;
8687
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
8788
if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
@@ -491,9 +492,9 @@ static void removeIntrinsicUsers(AllocaInst *AI) {
491492
}
492493

493494
if (!I->getType()->isVoidTy()) {
494-
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
495-
// Follow the use/def chain to erase them now instead of leaving it for
496-
// dead code elimination later.
495+
// The only users of this bitcast/GEP instruction are lifetime intrinsics,
496+
// fake_use as well as invariant group ones. Follow the use/def chain to
497+
// erase them now instead of leaving it for dead code elimination later.
497498
for (Use &UU : llvm::make_early_inc_range(I->uses())) {
498499
Instruction *Inst = cast<Instruction>(UU.getUser());
499500

llvm/test/Transforms/SROA/invariant-group.ll

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ define void @partial_promotion_of_alloca() {
142142
; CHECK-LABEL: @partial_promotion_of_alloca(
143143
; CHECK-NEXT: [[STRUCT_PTR_SROA_2:%.*]] = alloca i32, align 4
144144
; CHECK-NEXT: store volatile i32 0, ptr [[STRUCT_PTR_SROA_2]], align 4
145+
; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR_SROA_2]])
145146
; CHECK-NEXT: [[STRUCT_PTR_SROA_2_0_STRUCT_PTR_SROA_2_4_LOAD_VAL:%.*]] = load volatile i32, ptr [[STRUCT_PTR_SROA_2]], align 4
146147
; CHECK-NEXT: ret void
147148
;
@@ -155,6 +156,40 @@ define void @partial_promotion_of_alloca() {
155156
ret void
156157
}
157158

159+
define void @memcpy_after_laundering_alloca(ptr %ptr) {
160+
; CHECK-LABEL: @memcpy_after_laundering_alloca(
161+
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca { i64, i64 }, align 8
162+
; CHECK-NEXT: [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]])
163+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 16, i1 false)
164+
; CHECK-NEXT: ret void
165+
;
166+
%alloca = alloca { i64, i64 }, align 8
167+
%launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
168+
call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 16, i1 false)
169+
ret void
170+
}
171+
172+
define void @memcpy_after_laundering_alloca_slices(ptr %ptr) {
173+
; CHECK-LABEL: @memcpy_after_laundering_alloca_slices(
174+
; CHECK-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca [16 x i8], align 8
175+
; CHECK-NEXT: [[ALLOCA_SROA_3:%.*]] = alloca [16 x i8], align 8
176+
; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_0]])
177+
; CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_3]])
178+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP1]], ptr align 1 [[PTR:%.*]], i64 16, i1 false)
179+
; CHECK-NEXT: [[ALLOCA_SROA_2_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16
180+
; CHECK-NEXT: [[ALLOCA_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[ALLOCA_SROA_2_0_PTR_SROA_IDX]], align 1
181+
; CHECK-NEXT: [[ALLOCA_SROA_3_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 24
182+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 1 [[ALLOCA_SROA_3_0_PTR_SROA_IDX]], i64 16, i1 false)
183+
; CHECK-NEXT: ret void
184+
;
185+
%alloca = alloca { [16 x i8], i64, [16 x i8] }, align 8
186+
%launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
187+
%gep = getelementptr i8, ptr %launder, i64 16
188+
store i64 0, ptr %gep
189+
call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 40, i1 false)
190+
ret void
191+
}
192+
158193
declare void @use(ptr)
159194

160195
!0 = !{}

0 commit comments

Comments
 (0)