Skip to content

Commit a947419

Browse files
authored
[SROA] Improve handling of lifetimes in load-only promotion (llvm#135382)
The propagateStoredValuesToLoads() transform currently bails out if there is a lifetime intrinsic spanning the whole alloca, but the individual loads/stores operate on some smaller part, because the slice / partition size does not match. Fix this by ignoring assume-like slices early, regardless of which range they cover. I've changed the overall code structure here a bit because I was getting confused by the different iterators.
1 parent c9eebc7 commit a947419

File tree

3 files changed

+99
-46
lines changed

3 files changed

+99
-46
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 55 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -5498,45 +5498,14 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
54985498
// that do not overlap with any before them. The slices are sorted by
54995499
// increasing beginOffset. We don't use AS.partitions(), as it will use a more
55005500
// sophisticated algorithm that takes splittable slices into account.
5501-
auto PartitionBegin = AS.begin();
5502-
auto PartitionEnd = PartitionBegin;
5503-
uint64_t BeginOffset = PartitionBegin->beginOffset();
5504-
uint64_t EndOffset = PartitionBegin->endOffset();
5505-
while (PartitionBegin != AS.end()) {
5506-
bool AllSameAndValid = true;
5507-
SmallVector<Instruction *> Insts;
5508-
Type *PartitionType = nullptr;
5509-
while (PartitionEnd != AS.end() &&
5510-
(PartitionEnd->beginOffset() < EndOffset ||
5511-
PartitionEnd->endOffset() <= EndOffset)) {
5512-
if (AllSameAndValid) {
5513-
AllSameAndValid &= PartitionEnd->beginOffset() == BeginOffset &&
5514-
PartitionEnd->endOffset() == EndOffset;
5515-
Instruction *User =
5516-
cast<Instruction>(PartitionEnd->getUse()->getUser());
5517-
if (auto *LI = dyn_cast<LoadInst>(User)) {
5518-
Type *UserTy = LI->getType();
5519-
// LoadAndStorePromoter requires all the types to be the same.
5520-
if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5521-
AllSameAndValid = false;
5522-
PartitionType = UserTy;
5523-
Insts.push_back(User);
5524-
} else if (auto *SI = dyn_cast<StoreInst>(User)) {
5525-
Type *UserTy = SI->getValueOperand()->getType();
5526-
if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5527-
AllSameAndValid = false;
5528-
PartitionType = UserTy;
5529-
Insts.push_back(User);
5530-
} else if (!isAssumeLikeIntrinsic(User)) {
5531-
AllSameAndValid = false;
5532-
}
5533-
}
5534-
EndOffset = std::max(EndOffset, PartitionEnd->endOffset());
5535-
++PartitionEnd;
5536-
}
5501+
LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5502+
bool AllSameAndValid = true;
5503+
Type *PartitionType = nullptr;
5504+
SmallVector<Instruction *> Insts;
5505+
uint64_t BeginOffset = 0;
5506+
uint64_t EndOffset = 0;
55375507

5538-
// So long as all the slices start and end offsets matched, update loads to
5539-
// the values stored in the partition.
5508+
auto Flush = [&]() {
55405509
if (AllSameAndValid && !Insts.empty()) {
55415510
LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
55425511
<< EndOffset << ")\n");
@@ -5546,14 +5515,56 @@ bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
55465515
BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
55475516
Promoter.run(Insts);
55485517
}
5518+
AllSameAndValid = true;
5519+
PartitionType = nullptr;
5520+
Insts.clear();
5521+
};
55495522

5550-
// Step on to the next partition.
5551-
PartitionBegin = PartitionEnd;
5552-
if (PartitionBegin == AS.end())
5553-
break;
5554-
BeginOffset = PartitionBegin->beginOffset();
5555-
EndOffset = PartitionBegin->endOffset();
5523+
for (Slice &S : AS) {
5524+
auto *User = cast<Instruction>(S.getUse()->getUser());
5525+
if (isAssumeLikeIntrinsic(User)) {
5526+
LLVM_DEBUG({
5527+
dbgs() << "Ignoring slice: ";
5528+
AS.print(dbgs(), &S);
5529+
});
5530+
continue;
5531+
}
5532+
if (S.beginOffset() >= EndOffset) {
5533+
Flush();
5534+
BeginOffset = S.beginOffset();
5535+
EndOffset = S.endOffset();
5536+
} else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5537+
if (AllSameAndValid) {
5538+
LLVM_DEBUG({
5539+
dbgs() << "Slice does not match range [" << BeginOffset << ", "
5540+
<< EndOffset << ")";
5541+
AS.print(dbgs(), &S);
5542+
});
5543+
AllSameAndValid = false;
5544+
}
5545+
EndOffset = std::max(EndOffset, S.endOffset());
5546+
continue;
5547+
}
5548+
5549+
if (auto *LI = dyn_cast<LoadInst>(User)) {
5550+
Type *UserTy = LI->getType();
5551+
// LoadAndStorePromoter requires all the types to be the same.
5552+
if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5553+
AllSameAndValid = false;
5554+
PartitionType = UserTy;
5555+
Insts.push_back(User);
5556+
} else if (auto *SI = dyn_cast<StoreInst>(User)) {
5557+
Type *UserTy = SI->getValueOperand()->getType();
5558+
if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5559+
AllSameAndValid = false;
5560+
PartitionType = UserTy;
5561+
Insts.push_back(User);
5562+
} else {
5563+
AllSameAndValid = false;
5564+
}
55565565
}
5566+
5567+
Flush();
55575568
return true;
55585569
}
55595570

llvm/test/Transforms/SROA/non-capturing-call-readonly.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,7 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() {
803803
; CHECK: if.end:
804804
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 1
805805
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]])
806-
; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[C]], align 4
807-
; CHECK-NEXT: ret i64 [[LD]]
806+
; CHECK-NEXT: ret i64 0
808807
;
809808
entry:
810809
%c = alloca i64, align 2

llvm/test/Transforms/SROA/readonlynocapture.ll

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,4 +456,47 @@ define i32 @provenance_only_capture() {
456456
ret i32 %l1
457457
}
458458

459+
define i32 @simple_with_lifetimes() {
460+
; CHECK-LABEL: @simple_with_lifetimes(
461+
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
462+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[A]])
463+
; CHECK-NEXT: store i32 0, ptr [[A]], align 4
464+
; CHECK-NEXT: call void @callee(ptr [[A]])
465+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[A]])
466+
; CHECK-NEXT: ret i32 0
467+
;
468+
%a = alloca i32
469+
call void @llvm.lifetime.start(i64 4, ptr %a)
470+
store i32 0, ptr %a
471+
call void @callee(ptr %a)
472+
%l1 = load i32, ptr %a
473+
call void @llvm.lifetime.end(i64 4, ptr %a)
474+
ret i32 %l1
475+
}
476+
477+
define i32 @twoalloc_with_lifetimes() {
478+
; CHECK-LABEL: @twoalloc_with_lifetimes(
479+
; CHECK-NEXT: [[A:%.*]] = alloca { i32, i32 }, align 8
480+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr [[A]])
481+
; CHECK-NEXT: store i32 0, ptr [[A]], align 4
482+
; CHECK-NEXT: [[B:%.*]] = getelementptr i32, ptr [[A]], i32 1
483+
; CHECK-NEXT: store i32 1, ptr [[B]], align 4
484+
; CHECK-NEXT: call void @callee(ptr [[A]])
485+
; CHECK-NEXT: [[R:%.*]] = add i32 0, 1
486+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr [[A]])
487+
; CHECK-NEXT: ret i32 [[R]]
488+
;
489+
%a = alloca {i32, i32}
490+
call void @llvm.lifetime.start(i64 8, ptr %a)
491+
store i32 0, ptr %a
492+
%b = getelementptr i32, ptr %a, i32 1
493+
store i32 1, ptr %b
494+
call void @callee(ptr %a)
495+
%l1 = load i32, ptr %a
496+
%l2 = load i32, ptr %b
497+
%r = add i32 %l1, %l2
498+
call void @llvm.lifetime.end(i64 8, ptr %a)
499+
ret i32 %r
500+
}
501+
459502
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)

0 commit comments

Comments
 (0)