Skip to content

Commit 27fe33e

Browse files
feedback part1
1 parent 9062f71 commit 27fe33e

File tree

2 files changed

+211
-10
lines changed

2 files changed

+211
-10
lines changed

llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp

Lines changed: 33 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1591,11 +1591,14 @@ static void insertTrivialPHIs(CHRScope *Scope,
15911591
}
15921592
TrivialPHIs.insert(PN);
15931593
CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1594+
bool FoundLifetimeAnnotation = false;
15941595
for (Instruction *UI : Users) {
1595-
// Drop lifetime annotations as it is illegal for them to refer to a
1596-
// phi node.
1596+
// If we found a lifetime annotation, remove it, but set a flag
1597+
// to ensure that we remove all other lifetime annotations attached
1598+
// to the alloca.
15971599
if (UI->isLifetimeStartOrEnd()) {
15981600
UI->eraseFromParent();
1601+
FoundLifetimeAnnotation = true;
15991602
continue;
16001603
}
16011604
for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
@@ -1605,6 +1608,14 @@ static void insertTrivialPHIs(CHRScope *Scope,
16051608
}
16061609
CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
16071610
}
1611+
// Erase any leftover lifetime annotations for a dynamic alloca.
1612+
if (FoundLifetimeAnnotation) {
1613+
for (User *U : I.users()) {
1614+
if (auto *UI = dyn_cast<Instruction>(U))
1615+
if(UI->isLifetimeStartOrEnd())
1616+
UI->eraseFromParent();
1617+
}
1618+
}
16081619
}
16091620
}
16101621
}
@@ -1699,14 +1710,12 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
16991710
BasicBlock *ExitBlock = LastRegion->getExit();
17001711
std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
17011712

1702-
if (ExitBlock) {
1703-
// Insert a trivial phi at the exit block (where the CHR hot path and the
1704-
// cold path merges) for a value that's defined in the scope but used
1705-
// outside it (meaning it's alive at the exit block). We will add the
1706-
// incoming values for the CHR cold paths to it below. Without this, we'd
1707-
// miss updating phi's for such values unless there happens to already be a
1708-
// phi for that value there.
1709-
insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1713+
SmallVector<AllocaInst*> StaticAllocas;
1714+
for (Instruction &I : *EntryBlock) {
1715+
if (auto *AI = dyn_cast<AllocaInst>(&I)) {
1716+
if (AI->isStaticAlloca())
1717+
StaticAllocas.push_back(AI);
1718+
}
17101719
}
17111720

17121721
// Split the entry block of the first region. The new block becomes the new
@@ -1725,6 +1734,20 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
17251734
FirstRegion->replaceEntryRecursive(NewEntryBlock);
17261735
BasicBlock *PreEntryBlock = EntryBlock;
17271736

1737+
// Move static allocas into the pre-entry block so they stay static.
1738+
for (AllocaInst *AI : StaticAllocas)
1739+
AI->moveBefore(EntryBlock->getTerminator()->getIterator());
1740+
1741+
if (ExitBlock) {
1742+
// Insert a trivial phi at the exit block (where the CHR hot path and the
1743+
// cold path merges) for a value that's defined in the scope but used
1744+
// outside it (meaning it's alive at the exit block). We will add the
1745+
// incoming values for the CHR cold paths to it below. Without this, we'd
1746+
// miss updating phi's for such values unless there happens to already be a
1747+
// phi for that value there.
1748+
insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1749+
}
1750+
17281751
ValueToValueMapTy VMap;
17291752
// Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
17301753
// hot path (originals) and a cold path (clones) and update the PHIs at the
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes='require<profile-summary>,chr' -S | FileCheck %s
3+
4+
declare void @foo()
5+
declare void @bar()
6+
declare void @baz(i64)
7+
8+
; Test that when we have a static alloca in an entry block that will get split,
9+
; the alloca remains static and we preserve its lifetime annotations.
10+
define void @test_chr_with_lifetimes(ptr %i) !prof !14 {
11+
; CHECK-LABEL: @test_chr_with_lifetimes(
12+
; CHECK-NEXT: entry:
13+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4
14+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
15+
; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
16+
; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP1]]
17+
; CHECK-NEXT: [[TMP10:%.*]] = select i1 true, i1 [[TMP9]], i1 false
18+
; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP1]]
19+
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP10]], i1 [[TMP11]], i1 false
20+
; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15:![0-9]+]]
21+
; CHECK: entry.split:
22+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16:![0-9]+]]
23+
; CHECK-NEXT: call void @baz(i64 [[TMP6]])
24+
; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17:![0-9]+]]
25+
; CHECK: bb0:
26+
; CHECK-NEXT: call void @foo()
27+
; CHECK-NEXT: br label [[BB1]]
28+
; CHECK: entry.split.nonchr:
29+
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
30+
; CHECK-NEXT: call void @baz(i64 [[TMP7]])
31+
; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
32+
; CHECK: bb0.nonchr:
33+
; CHECK-NEXT: call void @foo()
34+
; CHECK-NEXT: br label [[BB1]]
35+
; CHECK: bb1:
36+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]])
37+
; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8
38+
; CHECK-NEXT: br label [[BB2:%.*]]
39+
; CHECK: bb2:
40+
; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[TMP3:%.*]], [[BB2]] ], [ null, [[BB1]] ]
41+
; CHECK-NEXT: [[TMP3]] = getelementptr i8, ptr [[TMP2]], i64 24
42+
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP2]], [[I]]
43+
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2]]
44+
; CHECK: bb3:
45+
; CHECK-NEXT: ret void
46+
;
47+
entry:
48+
%1 = load i32, ptr %i
49+
%2 = icmp eq i32 %1, 0
50+
%3 = select i1 %2, i64 4, i64 0, !prof !15
51+
%test = alloca i32, align 8
52+
call void @baz(i64 %3)
53+
br i1 %2, label %bb1, label %bb0, !prof !15
54+
55+
bb0:
56+
call void @foo()
57+
br label %bb1
58+
59+
bb1:
60+
call void @llvm.lifetime.start.p0(ptr %test)
61+
store ptr %test, ptr %i, align 8
62+
br label %bb2
63+
64+
bb2:
65+
%4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
66+
%5 = getelementptr i8, ptr %4, i64 24
67+
%6 = icmp eq ptr %4, %i
68+
br i1 %6, label %bb3, label %bb2
69+
70+
bb3:
71+
ret void
72+
}
73+
74+
; Test that we remove lifetime markers that would otherwise refer to phi
75+
; nodes given the dynamic allocas they referred to have been duplicated.
76+
define void @test_chr_dynamic_alloca(ptr %i) !prof !14 {
77+
; CHECK-LABEL: @test_chr_dynamic_alloca(
78+
; CHECK-NEXT: entry:
79+
; CHECK-NEXT: [[TEST1:%.*]] = load i32, ptr [[I:%.*]], align 4
80+
; CHECK-NEXT: [[TEST2:%.*]] = icmp eq i32 [[TEST1]], 5
81+
; CHECK-NEXT: br i1 [[TEST2]], label [[BB4:%.*]], label [[BB3:%.*]]
82+
; CHECK: bb4:
83+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
84+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
85+
; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]]
86+
; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false
87+
; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]]
88+
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
89+
; CHECK-NEXT: br i1 [[TMP5]], label [[BB4_SPLIT:%.*]], label [[BB4_SPLIT_NONCHR:%.*]], !prof [[PROF15]]
90+
; CHECK: bb4.split:
91+
; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]]
92+
; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
93+
; CHECK-NEXT: call void @baz(i64 [[TMP6]])
94+
; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]]
95+
; CHECK: bb0:
96+
; CHECK-NEXT: call void @foo()
97+
; CHECK-NEXT: br label [[BB1]]
98+
; CHECK: bb4.split.nonchr:
99+
; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
100+
; CHECK-NEXT: [[TEST_NONCHR:%.*]] = alloca i32, align 8
101+
; CHECK-NEXT: call void @baz(i64 [[TMP7]])
102+
; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
103+
; CHECK: bb0.nonchr:
104+
; CHECK-NEXT: call void @foo()
105+
; CHECK-NEXT: br label [[BB1]]
106+
; CHECK: bb1:
107+
; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TEST]], [[BB0]] ], [ [[TEST]], [[BB4_SPLIT]] ], [ [[TEST_NONCHR]], [[BB0_NONCHR]] ], [ [[TEST_NONCHR]], [[BB4_SPLIT_NONCHR]] ]
108+
; CHECK-NEXT: store ptr [[TMP8]], ptr [[I]], align 8
109+
; CHECK-NEXT: br label [[BB2:%.*]]
110+
; CHECK: bb2:
111+
; CHECK-NEXT: [[TMP9:%.*]] = phi ptr [ [[TMP10:%.*]], [[BB2]] ], [ null, [[BB1]] ]
112+
; CHECK-NEXT: [[TMP10]] = getelementptr i8, ptr [[TMP9]], i64 24
113+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq ptr [[TMP9]], [[I]]
114+
; CHECK-NEXT: br i1 [[TMP11]], label [[BB3]], label [[BB2]]
115+
; CHECK: bb3:
116+
; CHECK-NEXT: ret void
117+
;
118+
entry:
119+
%test1 = load i32, ptr %i
120+
%test2 = icmp eq i32 %test1, 5
121+
br i1 %test2, label %bb4, label %bb3
122+
123+
bb4:
124+
%1 = load i32, ptr %i
125+
%2 = icmp eq i32 %1, 0
126+
%3 = select i1 %2, i64 4, i64 0, !prof !15
127+
%test = alloca i32, align 8
128+
call void @baz(i64 %3)
129+
br i1 %2, label %bb1, label %bb0, !prof !15
130+
131+
bb0:
132+
call void @foo()
133+
call void @llvm.lifetime.start.p0(ptr %test)
134+
store ptr %test, ptr %i, align 8
135+
br label %bb1
136+
137+
bb1:
138+
call void @bar()
139+
call void @llvm.lifetime.start.p0(ptr %test)
140+
store ptr %test, ptr %i, align 8
141+
br label %bb2
142+
143+
bb2:
144+
%4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
145+
%5 = getelementptr i8, ptr %4, i64 24
146+
%test5 = load ptr, ptr %test
147+
%6 = icmp eq ptr %4, %test5
148+
br i1 %6, label %bb3, label %bb2
149+
150+
bb3:
151+
ret void
152+
}
153+
154+
155+
!llvm.module.flags = !{!0}
156+
!0 = !{i32 1, !"ProfileSummary", !1}
157+
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
158+
!2 = !{!"ProfileFormat", !"InstrProf"}
159+
!3 = !{!"TotalCount", i64 10000}
160+
!4 = !{!"MaxCount", i64 10}
161+
!5 = !{!"MaxInternalCount", i64 1}
162+
!6 = !{!"MaxFunctionCount", i64 1000}
163+
!7 = !{!"NumCounts", i64 3}
164+
!8 = !{!"NumFunctions", i64 3}
165+
!9 = !{!"DetailedSummary", !10}
166+
!10 = !{!11, !12, !13}
167+
!11 = !{i32 10000, i64 100, i32 1}
168+
!12 = !{i32 999000, i64 100, i32 1}
169+
!13 = !{i32 999999, i64 1, i32 2}
170+
171+
!14 = !{!"function_entry_count", i64 100}
172+
!15 = !{!"branch_weights", i32 0, i32 1}
173+
!16 = !{!"branch_weights", i32 1, i32 1}
174+
!17 = !{!"branch_weights", i32 0, i32 0}
175+
; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}
176+
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
177+
; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
178+
; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}

0 commit comments

Comments
 (0)