From 1927a199a59a2a61b538a2ff1b0f7477ae9bfa56 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Fri, 6 Sep 2024 11:03:59 +0200 Subject: [PATCH] [SROA] Rewrite invariant group intrinsics after splitting alloca A miscompilation issue has been addressed with improved handling. Fixes: https://github.com/llvm/llvm-project/issues/105537. --- llvm/lib/Transforms/Scalar/SROA.cpp | 89 +++++++++++++++---- .../Utils/PromoteMemoryToRegister.cpp | 9 +- llvm/test/Transforms/SROA/invariant-group.ll | 35 ++++++++ 3 files changed, 110 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 92589ab17da31..6cfbefb70608e 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2656,6 +2656,9 @@ class AllocaSliceRewriter : public InstVisitor { SmallSetVector &PHIUsers; SmallSetVector &SelectUsers; + // Track invariant intrinsic for rewriting its memory intrinsics users. + std::optional InvariantIntr; + // Utility IR builder, whose name prefix is setup for each visited use, and // the insertion point is set to point to the user. IRBuilderTy IRB; @@ -2789,6 +2792,28 @@ class AllocaSliceRewriter : public InstVisitor { ); } + // Return getNewAllocaSlicePtr, unless the new alloca ptr has passed through + // invariant group intrinsic, in which case ensure to return such a pointer. + Value *getInvariantGroupPtrOrNewAllocaSlicePtr(IRBuilderTy &IRB, + Instruction *I) { + // Get the newly-rewritten invariant or the current one, if it has been + // rewritten in previous iterations or alloca was not split further. + if (isa(I) && + cast(I)->isLaunderOrStripInvariantGroup()) + return InvariantIntr ? *InvariantIntr : I; + return getNewAllocaSlicePtr(IRB, I->getType()); + } + + // Return getPtrToNewAI, unless the new alloca ptr has passed through + // invariant group intrinsic, in which case ensure to return such a pointer. + Value *getInvariantGroupPtrOrPtrToNewAI(Instruction *I, unsigned AddrSpace, + bool IsVolatile) { + if (isa(I) && + cast(I)->isLaunderOrStripInvariantGroup()) + return InvariantIntr ? *InvariantIntr : I; + return getPtrToNewAI(AddrSpace, IsVolatile); + } + /// Compute suitable alignment to access this slice of the *new* /// alloca. /// @@ -3146,7 +3171,7 @@ class AllocaSliceRewriter : public InstVisitor { if (!isa(II.getLength())) { assert(!IsSplit); assert(NewBeginOffset == BeginOffset); - II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType())); + II.setDest(getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr)); II.setDestAlignment(getSliceAlign()); // In theory we should call migrateDebugInfo here. However, we do not // emit dbg.assign intrinsics for mem intrinsics storing through non- @@ -3187,8 +3212,8 @@ class AllocaSliceRewriter : public InstVisitor { unsigned Sz = NewEndOffset - NewBeginOffset; Constant *Size = ConstantInt::get(SizeTy, Sz); MemIntrinsic *New = cast(IRB.CreateMemSet( - getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, - MaybeAlign(getSliceAlign()), II.isVolatile())); + getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr), II.getValue(), + Size, MaybeAlign(getSliceAlign()), II.isVolatile())); if (AATags) New->setAAMetadata( AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz)); @@ -3261,7 +3286,8 @@ class AllocaSliceRewriter : public InstVisitor { V = convertValue(DL, IRB, V, AllocaTy); } - Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); + Value *NewPtr = getInvariantGroupPtrOrPtrToNewAI( + OldPtr, II.getDestAddressSpace(), II.isVolatile()); StoreInst *New = IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, @@ -3372,13 +3398,13 @@ class AllocaSliceRewriter : public InstVisitor { OtherAlign = commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue()); - if (EmitMemCpy) { - // Compute the other pointer, folding as much as possible to produce - // a single, simple GEP in most cases. - OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, - OtherPtr->getName() + "."); + // Compute the other pointer, folding as much as possible to produce + // a single, simple GEP in most cases. + OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, + OtherPtr->getName() + "."); - Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); + if (EmitMemCpy) { + Value *OutPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr); Type *SizeTy = II.getLength()->getType(); Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); @@ -3386,14 +3412,14 @@ class AllocaSliceRewriter : public InstVisitor { MaybeAlign DestAlign, SrcAlign; // Note: IsDest is true iff we're copying into the new alloca slice if (IsDest) { - DestPtr = OurPtr; + DestPtr = OutPtr; DestAlign = SliceAlign; SrcPtr = OtherPtr; SrcAlign = OtherAlign; } else { DestPtr = OtherPtr; DestAlign = OtherAlign; - SrcPtr = OurPtr; + SrcPtr = OutPtr; SrcAlign = SliceAlign; } CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign, @@ -3438,8 +3464,6 @@ class AllocaSliceRewriter : public InstVisitor { OtherTy = NewAllocaTy; } - Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy, - OtherPtr->getName() + "."); MaybeAlign SrcAlign = OtherAlign; MaybeAlign DstAlign = SliceAlign; if (!IsDest) @@ -3449,11 +3473,13 @@ class AllocaSliceRewriter : public InstVisitor { Value *DstPtr; if (IsDest) { - DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile()); - SrcPtr = AdjPtr; + DstPtr = getInvariantGroupPtrOrPtrToNewAI( + OldPtr, II.getDestAddressSpace(), II.isVolatile()); + SrcPtr = OtherPtr; } else { - DstPtr = AdjPtr; - SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile()); + DstPtr = OtherPtr; + SrcPtr = getInvariantGroupPtrOrPtrToNewAI( + OldPtr, II.getSourceAddressSpace(), II.isVolatile()); } Value *Src; @@ -3531,8 +3557,33 @@ class AllocaSliceRewriter : public InstVisitor { return true; } - if (II.isLaunderOrStripInvariantGroup()) + if (II.isLaunderOrStripInvariantGroup()) { + Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType()); + Value *New = nullptr; + if (II.getIntrinsicID() == Intrinsic::launder_invariant_group) + New = IRB.CreateLaunderInvariantGroup(AdjustedPtr); + else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group) + New = IRB.CreateStripInvariantGroup(AdjustedPtr); + + if (&OldAI == &NewAI) { + New->takeName(&II); + II.replaceAllUsesWith(New); + } else { + // If the alloca can be split further, memory intrinsics using the + // invariant group may also need to be rewritten. Record the invariant + // for when the memory intrinsic is later visited. + for (Use &U : II.uses()) + if (isa(U.getUser())) { + if (!InvariantIntr) + InvariantIntr = New; + continue; + } else { + U.set(New); + } + } + LLVM_DEBUG(dbgs() << " to: " << *New << "\n"); return true; + } assert(II.getArgOperand(1) == OldPtr); // Lifetime intrinsics are only promotable if they cover the whole alloca. diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 656bb1ebd1161..0727eb1defc56 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -81,7 +81,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { return false; } else if (const IntrinsicInst *II = dyn_cast(U)) { if (!II->isLifetimeStartOrEnd() && !II->isDroppable() && - II->getIntrinsicID() != Intrinsic::fake_use) + II->getIntrinsicID() != Intrinsic::fake_use && + !II->isLaunderOrStripInvariantGroup()) return false; } else if (const BitCastInst *BCI = dyn_cast(U)) { if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI)) @@ -491,9 +492,9 @@ static void removeIntrinsicUsers(AllocaInst *AI) { } if (!I->getType()->isVoidTy()) { - // The only users of this bitcast/GEP instruction are lifetime intrinsics. - // Follow the use/def chain to erase them now instead of leaving it for - // dead code elimination later. + // The only users of this bitcast/GEP instruction are lifetime intrinsics, + // fake_use as well as invariant group ones. Follow the use/def chain to + // erase them now instead of leaving it for dead code elimination later. for (Use &UU : llvm::make_early_inc_range(I->uses())) { Instruction *Inst = cast(UU.getUser()); diff --git a/llvm/test/Transforms/SROA/invariant-group.ll b/llvm/test/Transforms/SROA/invariant-group.ll index 1be6f6e2fc32b..2b4e588ba0358 100644 --- a/llvm/test/Transforms/SROA/invariant-group.ll +++ b/llvm/test/Transforms/SROA/invariant-group.ll @@ -142,6 +142,7 @@ define void @partial_promotion_of_alloca() { ; CHECK-LABEL: @partial_promotion_of_alloca( ; CHECK-NEXT: [[STRUCT_PTR_SROA_2:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store volatile i32 0, ptr [[STRUCT_PTR_SROA_2]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR_SROA_2]]) ; CHECK-NEXT: [[STRUCT_PTR_SROA_2_0_STRUCT_PTR_SROA_2_4_LOAD_VAL:%.*]] = load volatile i32, ptr [[STRUCT_PTR_SROA_2]], align 4 ; CHECK-NEXT: ret void ; @@ -155,6 +156,40 @@ define void @partial_promotion_of_alloca() { ret void } +define void @memcpy_after_laundering_alloca(ptr %ptr) { +; CHECK-LABEL: @memcpy_after_laundering_alloca( +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca { i64, i64 }, align 8 +; CHECK-NEXT: [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 16, i1 false) +; CHECK-NEXT: ret void +; + %alloca = alloca { i64, i64 }, align 8 + %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca) + call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 16, i1 false) + ret void +} + +define void @memcpy_after_laundering_alloca_slices(ptr %ptr) { +; CHECK-LABEL: @memcpy_after_laundering_alloca_slices( +; CHECK-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca [16 x i8], align 8 +; CHECK-NEXT: [[ALLOCA_SROA_3:%.*]] = alloca [16 x i8], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_0]]) +; CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_3]]) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP1]], ptr align 1 [[PTR:%.*]], i64 16, i1 false) +; CHECK-NEXT: [[ALLOCA_SROA_2_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16 +; CHECK-NEXT: [[ALLOCA_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[ALLOCA_SROA_2_0_PTR_SROA_IDX]], align 1 +; CHECK-NEXT: [[ALLOCA_SROA_3_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 24 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 1 [[ALLOCA_SROA_3_0_PTR_SROA_IDX]], i64 16, i1 false) +; CHECK-NEXT: ret void +; + %alloca = alloca { [16 x i8], i64, [16 x i8] }, align 8 + %launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca) + %gep = getelementptr i8, ptr %launder, i64 16 + store i64 0, ptr %gep + call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 40, i1 false) + ret void +} + declare void @use(ptr) !0 = !{}