Skip to content

Commit d3825e4

Browse files
committed
[SROA] Improve handling of lifetimes in load-only promotion
The propagateStoredValuesToLoads() transform currently bails out if there is a lifetime intrinsic spanning the whole alloca, but the individual loads/stores operate on some smaller part, because the slice / partition size does not match. Fix this by ignoring assume-like slices early, regardless of which range they cover. I've changed the overall code structure here a bit because I was getting confused by the different iterators.
1 parent a170eb5 commit d3825e4

File tree

3 files changed

+54
-49
lines changed

3 files changed

+54
-49
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 52 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5498,45 +5498,14 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
54985498
// that do not overlap with any before them. The slices are sorted by
54995499
// increasing beginOffset. We don't use AS.partitions(), as it will use a more
55005500
// sophisticated algorithm that takes splittable slices into account.
5501-
auto PartitionBegin = AS.begin();
5502-
auto PartitionEnd = PartitionBegin;
5503-
uint64_t BeginOffset = PartitionBegin->beginOffset();
5504-
uint64_t EndOffset = PartitionBegin->endOffset();
5505-
while (PartitionBegin != AS.end()) {
5506-
bool AllSameAndValid = true;
5507-
SmallVector<Instruction *> Insts;
5508-
Type *PartitionType = nullptr;
5509-
while (PartitionEnd != AS.end() &&
5510-
(PartitionEnd->beginOffset() < EndOffset ||
5511-
PartitionEnd->endOffset() <= EndOffset)) {
5512-
if (AllSameAndValid) {
5513-
AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
5514-
PartitionEnd->endOffset() == EndOffset;
5515-
Instruction *User =
5516-
cast<Instruction>(PartitionEnd->getUse()->getUser());
5517-
if (auto *LI = dyn_cast<LoadInst>(User)) {
5518-
Type *UserTy = LI->getType();
5519-
// LoadAndStorePromoter requires all the types to be the same.
5520-
if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5521-
AllSameAndValid = false;
5522-
PartitionType = UserTy;
5523-
Insts.push_back(User);
5524-
} else if (auto *SI = dyn_cast<StoreInst>(User)) {
5525-
Type *UserTy = SI->getValueOperand()->getType();
5526-
if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5527-
AllSameAndValid = false;
5528-
PartitionType = UserTy;
5529-
Insts.push_back(User);
5530-
} else if (!isAssumeLikeIntrinsic(User)) {
5531-
AllSameAndValid = false;
5532-
}
5533-
}
5534-
EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
5535-
++PartitionEnd;
5536-
}
5501+
LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5502+
bool AllSameAndValid = true;
5503+
Type *PartitionType = nullptr;
5504+
SmallVector<Instruction *> Insts;
5505+
uint64_t BeginOffset = 0;
5506+
uint64_t EndOffset = 0;
55375507

5538-
// So long as all the slices start and end offsets matched, update loads to
5539-
// the values stored in the partition.
5508+
auto Flush = [&]() {
55405509
if (AllSameAndValid && !Insts.empty()) {
55415510
LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
55425511
<< EndOffset << ")\n");
@@ -5546,14 +5515,53 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
55465515
BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
55475516
Promoter.run(Insts);
55485517
}
5518+
AllSameAndValid = true;
5519+
PartitionType = nullptr;
5520+
Insts.clear();
5521+
};
55495522

5550-
// Step on to the next partition.
5551-
PartitionBegin = PartitionEnd;
5552-
if (PartitionBegin == AS.end())
5553-
break;
5554-
BeginOffset = PartitionBegin->beginOffset();
5555-
EndOffset = PartitionBegin->endOffset();
5523+
for (Slice &S : AS) {
5524+
auto *User = cast<Instruction>(S.getUse()->getUser());
5525+
if (isAssumeLikeIntrinsic(User)) {
5526+
LLVM_DEBUG({
5527+
dbgs() << "Ignoring slice: ";
5528+
AS.print(dbgs(), &S);
5529+
});
5530+
continue;
5531+
}
5532+
if (S.beginOffset() >= EndOffset) {
5533+
Flush();
5534+
BeginOffset = S.beginOffset();
5535+
EndOffset = S.endOffset();
5536+
} else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5537+
LLVM_DEBUG({
5538+
dbgs() << "Slice does not match range [" << BeginOffset << ", "
5539+
<< EndOffset << ")";
5540+
AS.print(dbgs(), &S);
5541+
});
5542+
AllSameAndValid = false;
5543+
EndOffset = std::max(EndOffset, S.endOffset());
5544+
}
5545+
5546+
if (auto *LI = dyn_cast<LoadInst>(User)) {
5547+
Type *UserTy = LI->getType();
5548+
// LoadAndStorePromoter requires all the types to be the same.
5549+
if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5550+
AllSameAndValid = false;
5551+
PartitionType = UserTy;
5552+
Insts.push_back(User);
5553+
} else if (auto *SI = dyn_cast<StoreInst>(User)) {
5554+
Type *UserTy = SI->getValueOperand()->getType();
5555+
if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5556+
AllSameAndValid = false;
5557+
PartitionType = UserTy;
5558+
Insts.push_back(User);
5559+
} else {
5560+
AllSameAndValid = false;
5561+
}
55565562
}
5563+
5564+
Flush();
55575565
return true;
55585566
}
55595567

llvm/test/Transforms/SROA/non-capturing-call-readonly.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
803803
; CHECK: if.end:
804804
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
805805
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
806-
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[C]], align 4
807-
; CHECK-NEXT: ret i64 [[LD]]
806+
; CHECK-NEXT: ret i64 0
808807
;
809808
entry:
810809
%c = alloca i64, align 2

llvm/test/Transforms/SROA/readonlynocapture.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -482,9 +482,7 @@ define i32 @twoalloc_with_lifetimes() {
482482
; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
483483
; CHECK-NEXT: store i32 1, ptr [[B]], align 4
484484
; CHECK-NEXT: call void @callee(ptr [[A]])
485-
; CHECK-NEXT: [[L1:%.*]] = load i32, ptr [[A]], align 4
486-
; CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[B]], align 4
487-
; CHECK-NEXT: [[R:%.*]] = add i32 [[L1]], [[L2]]
485+
; CHECK-NEXT: [[R:%.*]] = add i32 0, 1
488486
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[A]])
489487
; CHECK-NEXT: ret i32 [[R]]
490488
;

0 commit comments

Comments
 (0)