Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 70 additions & 19 deletions llvm/lib/Transforms/Scalar/SROA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2656,6 +2656,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
SmallSetVector<PHINode *, 8> &PHIUsers;
SmallSetVector<SelectInst *, 8> &SelectUsers;

// Track invariant intrinsic for rewriting its memory intrinsics users.
std::optional<Value *> InvariantIntr;

// Utility IR builder, whose name prefix is setup for each visited use, and
// the insertion point is set to point to the user.
IRBuilderTy IRB;
Expand Down Expand Up @@ -2789,6 +2792,28 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
);
}

// Return getNewAllocaSlicePtr, unless the new alloca ptr has passed through
// invariant group intrinsic, in which case ensure to return such a pointer.
Value *getInvariantGroupPtrOrNewAllocaSlicePtr(IRBuilderTy &IRB,
Instruction *I) {
// Get the newly-rewritten invariant or the current one, if it has been
// rewritten in previous iterations or alloca was not split further.
if (isa<IntrinsicInst>(I) &&
cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
return InvariantIntr ? *InvariantIntr : I;
return getNewAllocaSlicePtr(IRB, I->getType());
}

// Return getPtrToNewAI, unless the new alloca ptr has passed through
// invariant group intrinsic, in which case ensure to return such a pointer.
Value *getInvariantGroupPtrOrPtrToNewAI(Instruction *I, unsigned AddrSpace,
bool IsVolatile) {
if (isa<IntrinsicInst>(I) &&
cast<IntrinsicInst>(I)->isLaunderOrStripInvariantGroup())
return InvariantIntr ? *InvariantIntr : I;
return getPtrToNewAI(AddrSpace, IsVolatile);
}

/// Compute suitable alignment to access this slice of the *new*
/// alloca.
///
Expand Down Expand Up @@ -3146,7 +3171,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
if (!isa<ConstantInt>(II.getLength())) {
assert(!IsSplit);
assert(NewBeginOffset == BeginOffset);
II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
II.setDest(getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr));
II.setDestAlignment(getSliceAlign());
// In theory we should call migrateDebugInfo here. However, we do not
// emit dbg.assign intrinsics for mem intrinsics storing through non-
Expand Down Expand Up @@ -3187,8 +3212,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
unsigned Sz = NewEndOffset - NewBeginOffset;
Constant *Size = ConstantInt::get(SizeTy, Sz);
MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
MaybeAlign(getSliceAlign()), II.isVolatile()));
getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr), II.getValue(),
Size, MaybeAlign(getSliceAlign()), II.isVolatile()));
if (AATags)
New->setAAMetadata(
AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
Expand Down Expand Up @@ -3261,7 +3286,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
V = convertValue(DL, IRB, V, AllocaTy);
}

Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
Value *NewPtr = getInvariantGroupPtrOrPtrToNewAI(
OldPtr, II.getDestAddressSpace(), II.isVolatile());
StoreInst *New =
IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
Expand Down Expand Up @@ -3372,28 +3398,28 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
OtherAlign =
commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());

if (EmitMemCpy) {
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
// Compute the other pointer, folding as much as possible to produce
// a single, simple GEP in most cases.
OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");

Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
if (EmitMemCpy) {
Value *OutPtr = getInvariantGroupPtrOrNewAllocaSlicePtr(IRB, OldPtr);
Type *SizeTy = II.getLength()->getType();
Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);

Value *DestPtr, *SrcPtr;
MaybeAlign DestAlign, SrcAlign;
// Note: IsDest is true iff we're copying into the new alloca slice
if (IsDest) {
DestPtr = OurPtr;
DestPtr = OutPtr;
DestAlign = SliceAlign;
SrcPtr = OtherPtr;
SrcAlign = OtherAlign;
} else {
DestPtr = OtherPtr;
DestAlign = OtherAlign;
SrcPtr = OurPtr;
SrcPtr = OutPtr;
SrcAlign = SliceAlign;
}
CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
Expand Down Expand Up @@ -3438,8 +3464,6 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
OtherTy = NewAllocaTy;
}

Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
OtherPtr->getName() + ".");
MaybeAlign SrcAlign = OtherAlign;
MaybeAlign DstAlign = SliceAlign;
if (!IsDest)
Expand All @@ -3449,11 +3473,13 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
Value *DstPtr;

if (IsDest) {
DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
SrcPtr = AdjPtr;
DstPtr = getInvariantGroupPtrOrPtrToNewAI(
OldPtr, II.getDestAddressSpace(), II.isVolatile());
SrcPtr = OtherPtr;
} else {
DstPtr = AdjPtr;
SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
DstPtr = OtherPtr;
SrcPtr = getInvariantGroupPtrOrPtrToNewAI(
OldPtr, II.getSourceAddressSpace(), II.isVolatile());
}

Value *Src;
Expand Down Expand Up @@ -3531,8 +3557,33 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
return true;
}

if (II.isLaunderOrStripInvariantGroup())
if (II.isLaunderOrStripInvariantGroup()) {
Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
Value *New = nullptr;
if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
New = IRB.CreateLaunderInvariantGroup(AdjustedPtr);
else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
New = IRB.CreateStripInvariantGroup(AdjustedPtr);

if (&OldAI == &NewAI) {
New->takeName(&II);
II.replaceAllUsesWith(New);
} else {
// If the alloca can be split further, memory intrinsics using the
// invariant group may also need to be rewritten. Record the invariant
// for when the memory intrinsic is later visited.
for (Use &U : II.uses())
if (isa<MemIntrinsic>(U.getUser())) {
if (!InvariantIntr)
InvariantIntr = New;
continue;
} else {
U.set(New);
}
}
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return true;
}

assert(II.getArgOperand(1) == OldPtr);
// Lifetime intrinsics are only promotable if they cover the whole alloca.
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
return false;
} else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
if (!II->isLifetimeStartOrEnd() && !II->isDroppable() &&
II->getIntrinsicID() != Intrinsic::fake_use)
II->getIntrinsicID() != Intrinsic::fake_use &&
!II->isLaunderOrStripInvariantGroup())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand how this is supposed to work. I don't think we have logic to handle these in mem2reg? Also wouldn't we have to scan their uses here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be wrong, but we don't seem to have logic to handle the others here, do we actually need any specific logic in mem2reg? Why should we scan their uses? The change seems to be needed as the alloca is marked as promotable, as otherwise we would assert in PromoteMemoryToRegister. I don't think we want to mark the alloca as non-promotable when visiting the intrinsic and prevent mem2reg from having the chance to simplify the alloca further.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I think this gets handled here:

// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
for (Use &UU : llvm::make_early_inc_range(I->uses())) {
Instruction *Inst = cast<Instruction>(UU.getUser());
// Drop the use of I in droppable instructions.
if (Inst->isDroppable()) {
Inst->dropDroppableUse(UU);
continue;
}
Inst->eraseFromParent();
}
I think the handling is ultimately correct, but the comment there could use an update.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment updated, thanks.

return false;
} else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (!onlyUsedByLifetimeMarkersOrDroppableInsts(BCI))
Expand Down Expand Up @@ -491,9 +492,9 @@ static void removeIntrinsicUsers(AllocaInst *AI) {
}

if (!I->getType()->isVoidTy()) {
// The only users of this bitcast/GEP instruction are lifetime intrinsics.
// Follow the use/def chain to erase them now instead of leaving it for
// dead code elimination later.
// The only users of this bitcast/GEP instruction are lifetime intrinsics,
// fake_use as well as invariant group ones. Follow the use/def chain to
// erase them now instead of leaving it for dead code elimination later.
for (Use &UU : llvm::make_early_inc_range(I->uses())) {
Instruction *Inst = cast<Instruction>(UU.getUser());

Expand Down
35 changes: 35 additions & 0 deletions llvm/test/Transforms/SROA/invariant-group.ll
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ define void @partial_promotion_of_alloca() {
; CHECK-LABEL: @partial_promotion_of_alloca(
; CHECK-NEXT: [[STRUCT_PTR_SROA_2:%.*]] = alloca i32, align 4
; CHECK-NEXT: store volatile i32 0, ptr [[STRUCT_PTR_SROA_2]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[STRUCT_PTR_SROA_2]])
; CHECK-NEXT: [[STRUCT_PTR_SROA_2_0_STRUCT_PTR_SROA_2_4_LOAD_VAL:%.*]] = load volatile i32, ptr [[STRUCT_PTR_SROA_2]], align 4
; CHECK-NEXT: ret void
;
Expand All @@ -155,6 +156,40 @@ define void @partial_promotion_of_alloca() {
ret void
}

define void @memcpy_after_laundering_alloca(ptr %ptr) {
; CHECK-LABEL: @memcpy_after_laundering_alloca(
; CHECK-NEXT: [[ALLOCA:%.*]] = alloca { i64, i64 }, align 8
; CHECK-NEXT: [[LAUNDER:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[LAUNDER]], ptr [[PTR:%.*]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%alloca = alloca { i64, i64 }, align 8
%launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 16, i1 false)
ret void
}

define void @memcpy_after_laundering_alloca_slices(ptr %ptr) {
; CHECK-LABEL: @memcpy_after_laundering_alloca_slices(
; CHECK-NEXT: [[ALLOCA_SROA_0:%.*]] = alloca [16 x i8], align 8
; CHECK-NEXT: [[ALLOCA_SROA_3:%.*]] = alloca [16 x i8], align 8
; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_0]])
; CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.launder.invariant.group.p0(ptr [[ALLOCA_SROA_3]])
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP1]], ptr align 1 [[PTR:%.*]], i64 16, i1 false)
; CHECK-NEXT: [[ALLOCA_SROA_2_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16
; CHECK-NEXT: [[ALLOCA_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[ALLOCA_SROA_2_0_PTR_SROA_IDX]], align 1
; CHECK-NEXT: [[ALLOCA_SROA_3_0_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 24
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 1 [[ALLOCA_SROA_3_0_PTR_SROA_IDX]], i64 16, i1 false)
; CHECK-NEXT: ret void
;
%alloca = alloca { [16 x i8], i64, [16 x i8] }, align 8
%launder = call ptr @llvm.launder.invariant.group.p0(ptr %alloca)
%gep = getelementptr i8, ptr %launder, i64 16
store i64 0, ptr %gep
call void @llvm.memcpy.p0.p0.i64(ptr %launder, ptr %ptr, i64 40, i1 false)
ret void
}

declare void @use(ptr)

!0 = !{}
Expand Down
Loading