Skip to content

Commit a004509

Browse files
[ControlHeightReduction] Drop lifetime annotations where necessary (#159686)
ControlHeightReduction will duplicate some blocks and insert phi nodes in exit blocks of regions that it operates on for any live values. This includes allocas. Having a lifetime annotation refer to a phi node was made illegal in 92c55a3, which causes the verifier to fail after CHR. There are some cases where we might not need to drop lifetime annotations (usually because we do not need the phi to begin with), but drop all annotations for now to be conservative. Fixes #159621.
1 parent 2b5e29e commit a004509

File tree

2 files changed

+215
-8
lines changed

2 files changed

+215
-8
lines changed

llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,14 +1591,31 @@ static void insertTrivialPHIs(CHRScope *Scope,
15911591
}
15921592
TrivialPHIs.insert(PN);
15931593
CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1594+
bool FoundLifetimeAnnotation = false;
15941595
for (Instruction *UI : Users) {
1596+
// If we found a lifetime annotation, remove it, but set a flag
1597+
// to ensure that we remove all other lifetime annotations attached
1598+
// to the alloca.
1599+
if (UI->isLifetimeStartOrEnd()) {
1600+
UI->eraseFromParent();
1601+
FoundLifetimeAnnotation = true;
1602+
continue;
1603+
}
15951604
for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
15961605
if (UI->getOperand(J) == &I) {
15971606
UI->setOperand(J, PN);
15981607
}
15991608
}
16001609
CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
16011610
}
1611+
// Erase any leftover lifetime annotations for a dynamic alloca.
1612+
if (FoundLifetimeAnnotation) {
1613+
for (User *U : make_early_inc_range(I.users())) {
1614+
if (auto *UI = dyn_cast<Instruction>(U))
1615+
if (UI->isLifetimeStartOrEnd())
1616+
UI->eraseFromParent();
1617+
}
1618+
}
16021619
}
16031620
}
16041621
}
@@ -1693,14 +1710,12 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
16931710
BasicBlock *ExitBlock = LastRegion->getExit();
16941711
std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
16951712

1696-
if (ExitBlock) {
1697-
// Insert a trivial phi at the exit block (where the CHR hot path and the
1698-
// cold path merges) for a value that's defined in the scope but used
1699-
// outside it (meaning it's alive at the exit block). We will add the
1700-
// incoming values for the CHR cold paths to it below. Without this, we'd
1701-
// miss updating phi's for such values unless there happens to already be a
1702-
// phi for that value there.
1703-
insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1713+
SmallVector<AllocaInst *> StaticAllocas;
1714+
for (Instruction &I : *EntryBlock) {
1715+
if (auto *AI = dyn_cast<AllocaInst>(&I)) {
1716+
if (AI->isStaticAlloca())
1717+
StaticAllocas.push_back(AI);
1718+
}
17041719
}
17051720

17061721
// Split the entry block of the first region. The new block becomes the new
@@ -1719,6 +1734,20 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
17191734
FirstRegion->replaceEntryRecursive(NewEntryBlock);
17201735
BasicBlock *PreEntryBlock = EntryBlock;
17211736

1737+
// Move static allocas into the pre-entry block so they stay static.
1738+
for (AllocaInst *AI : StaticAllocas)
1739+
AI->moveBefore(EntryBlock->getTerminator()->getIterator());
1740+
1741+
if (ExitBlock) {
1742+
// Insert a trivial phi at the exit block (where the CHR hot path and the
1743+
// cold path merges) for a value that's defined in the scope but used
1744+
// outside it (meaning it's alive at the exit block). We will add the
1745+
// incoming values for the CHR cold paths to it below. Without this, we'd
1746+
// miss updating phi's for such values unless there happens to already be a
1747+
// phi for that value there.
1748+
insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1749+
}
1750+
17221751
ValueToValueMapTy VMap;
17231752
// Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
17241753
// hot path (originals) and a cold path (clones) and update the PHIs at the
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes='require<profile-summary>,chr' -S | FileCheck %s
3+
4+
declare void @foo()
5+
declare void @bar()
6+
declare void @baz(i64)
7+
8+
; Test that when we have a static alloca in an entry block that will get split,
9+
; the alloca remains static and we preserve its lifetime annotations.
10+
define void @test_chr_with_lifetimes(ptr %i) !prof !14 {
11+
; CHECK-LABEL: @test_chr_with_lifetimes(
12+
; CHECK-NEXT: entry:
13+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4
14+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
15+
; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
16+
; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP1]]
17+
; CHECK-NEXT: [[TMP10:%.*]] = select i1 true, i1 [[TMP9]], i1 false
18+
; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP1]]
19+
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP10]], i1 [[TMP11]], i1 false
20+
; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15:![0-9]+]]
21+
; CHECK: entry.split:
22+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16:![0-9]+]]
23+
; CHECK-NEXT: call void @baz(i64 [[TMP6]])
24+
; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17:![0-9]+]]
25+
; CHECK: bb0:
26+
; CHECK-NEXT: call void @foo()
27+
; CHECK-NEXT: br label [[BB1]]
28+
; CHECK: entry.split.nonchr:
29+
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
30+
; CHECK-NEXT: call void @baz(i64 [[TMP7]])
31+
; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
32+
; CHECK: bb0.nonchr:
33+
; CHECK-NEXT: call void @foo()
34+
; CHECK-NEXT: br label [[BB1]]
35+
; CHECK: bb1:
36+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]])
37+
; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8
38+
; CHECK-NEXT: br label [[BB2:%.*]]
39+
; CHECK: bb2:
40+
; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[TMP3:%.*]], [[BB2]] ], [ null, [[BB1]] ]
41+
; CHECK-NEXT: [[TMP3]] = getelementptr i8, ptr [[TMP2]], i64 24
42+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP2]], [[I]]
43+
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2]]
44+
; CHECK: bb3:
45+
; CHECK-NEXT: ret void
46+
;
47+
entry:
48+
%1 = load i32, ptr %i
49+
%2 = icmp eq i32 %1, 0
50+
%3 = select i1 %2, i64 4, i64 0, !prof !15
51+
%test = alloca i32, align 8
52+
call void @baz(i64 %3)
53+
br i1 %2, label %bb1, label %bb0, !prof !15
54+
55+
bb0:
56+
call void @foo()
57+
br label %bb1
58+
59+
bb1:
60+
call void @llvm.lifetime.start.p0(ptr %test)
61+
store ptr %test, ptr %i, align 8
62+
br label %bb2
63+
64+
bb2:
65+
%4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
66+
%5 = getelementptr i8, ptr %4, i64 24
67+
%6 = icmp eq ptr %4, %i
68+
br i1 %6, label %bb3, label %bb2
69+
70+
bb3:
71+
ret void
72+
}
73+
74+
; Test that we remove lifetime markers that would otherwise refer to phi
75+
; nodes given the dynamic allocas they referred to have been duplicated.
76+
define void @test_chr_dynamic_alloca(ptr %i) !prof !14 {
77+
; CHECK-LABEL: @test_chr_dynamic_alloca(
78+
; CHECK-NEXT: entry:
79+
; CHECK-NEXT: [[TEST1:%.*]] = load i32, ptr [[I:%.*]], align 4
80+
; CHECK-NEXT: [[TEST2:%.*]] = icmp eq i32 [[TEST1]], 5
81+
; CHECK-NEXT: br i1 [[TEST2]], label [[BB4:%.*]], label [[BB3:%.*]]
82+
; CHECK: bb4:
83+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
84+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
85+
; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]]
86+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false
87+
; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]]
88+
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
89+
; CHECK-NEXT: br i1 [[TMP5]], label [[BB4_SPLIT:%.*]], label [[BB4_SPLIT_NONCHR:%.*]], !prof [[PROF15]]
90+
; CHECK: bb4.split:
91+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]]
92+
; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
93+
; CHECK-NEXT: call void @baz(i64 [[TMP6]])
94+
; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]]
95+
; CHECK: bb0:
96+
; CHECK-NEXT: call void @foo()
97+
; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8
98+
; CHECK-NEXT: br label [[BB1]]
99+
; CHECK: bb4.split.nonchr:
100+
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
101+
; CHECK-NEXT: [[TEST_NONCHR:%.*]] = alloca i32, align 8
102+
; CHECK-NEXT: call void @baz(i64 [[TMP7]])
103+
; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
104+
; CHECK: bb0.nonchr:
105+
; CHECK-NEXT: call void @foo()
106+
; CHECK-NEXT: store ptr [[TEST_NONCHR]], ptr [[I]], align 8
107+
; CHECK-NEXT: br label [[BB1]]
108+
; CHECK: bb1:
109+
; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TEST]], [[BB0]] ], [ [[TEST]], [[BB4_SPLIT]] ], [ [[TEST_NONCHR]], [[BB0_NONCHR]] ], [ [[TEST_NONCHR]], [[BB4_SPLIT_NONCHR]] ]
110+
; CHECK-NEXT: call void @bar()
111+
; CHECK-NEXT: store ptr [[TMP8]], ptr [[I]], align 8
112+
; CHECK-NEXT: br label [[BB2:%.*]]
113+
; CHECK: bb2:
114+
; CHECK-NEXT: [[TMP9:%.*]] = phi ptr [ [[TMP10:%.*]], [[BB2]] ], [ null, [[BB1]] ]
115+
; CHECK-NEXT: [[TMP10]] = getelementptr i8, ptr [[TMP9]], i64 24
116+
; CHECK-NEXT: [[TEST5:%.*]] = load ptr, ptr [[TMP8]], align 8
117+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq ptr [[TMP9]], [[TEST5]]
118+
; CHECK-NEXT: br i1 [[TMP11]], label [[BB3]], label [[BB2]]
119+
; CHECK: bb3:
120+
; CHECK-NEXT: ret void
121+
;
122+
entry:
123+
%test1 = load i32, ptr %i
124+
%test2 = icmp eq i32 %test1, 5
125+
br i1 %test2, label %bb4, label %bb3
126+
127+
bb4:
128+
%1 = load i32, ptr %i
129+
%2 = icmp eq i32 %1, 0
130+
%3 = select i1 %2, i64 4, i64 0, !prof !15
131+
%test = alloca i32, align 8
132+
call void @baz(i64 %3)
133+
br i1 %2, label %bb1, label %bb0, !prof !15
134+
135+
bb0:
136+
call void @foo()
137+
call void @llvm.lifetime.start.p0(ptr %test)
138+
store ptr %test, ptr %i, align 8
139+
br label %bb1
140+
141+
bb1:
142+
call void @bar()
143+
call void @llvm.lifetime.start.p0(ptr %test)
144+
store ptr %test, ptr %i, align 8
145+
br label %bb2
146+
147+
bb2:
148+
%4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
149+
%5 = getelementptr i8, ptr %4, i64 24
150+
%test5 = load ptr, ptr %test
151+
call void @llvm.lifetime.end.p0(ptr %test)
152+
%6 = icmp eq ptr %4, %test5
153+
br i1 %6, label %bb3, label %bb2
154+
155+
bb3:
156+
ret void
157+
}
158+
159+
160+
!llvm.module.flags = !{!0}
161+
!0 = !{i32 1, !"ProfileSummary", !1}
162+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
163+
!2 = !{!"ProfileFormat", !"InstrProf"}
164+
!3 = !{!"TotalCount", i64 10000}
165+
!4 = !{!"MaxCount", i64 10}
166+
!5 = !{!"MaxInternalCount", i64 1}
167+
!6 = !{!"MaxFunctionCount", i64 1000}
168+
!7 = !{!"NumCounts", i64 3}
169+
!8 = !{!"NumFunctions", i64 3}
170+
!9 = !{!"DetailedSummary", !10}
171+
!10 = !{!11, !12, !13}
172+
!11 = !{i32 10000, i64 100, i32 1}
173+
!12 = !{i32 999000, i64 100, i32 1}
174+
!13 = !{i32 999999, i64 1, i32 2}
175+
176+
!14 = !{!"function_entry_count", i64 100}
177+
!15 = !{!"branch_weights", i32 0, i32 1}
178+
; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}

0 commit comments

Comments
 (0)