Skip to content

Commit 563c0e1

Browse files
committed
[PGO] Supporting code for always instrumenting loop entries
This patch extends the PGO infrastructure with an option to prefer the instrumentation of loop entry blocks. This option is a generalization of 19fb5b4, and helps to cover cases where the loop exit is never executed. An example where this can occur are event handling loops. Note that change does NOT change the default behavior.
1 parent abac5be commit 563c0e1

File tree

4 files changed

+129
-25
lines changed

4 files changed

+129
-25
lines changed

llvm/include/llvm/Transforms/Instrumentation/CFGMST.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Analysis/BlockFrequencyInfo.h"
2020
#include "llvm/Analysis/BranchProbabilityInfo.h"
2121
#include "llvm/Analysis/CFG.h"
22+
#include "llvm/Analysis/LoopInfo.h"
2223
#include "llvm/IR/Instructions.h"
2324
#include "llvm/IR/IntrinsicInst.h"
2425
#include "llvm/Support/BranchProbability.h"
@@ -52,10 +53,14 @@ template <class Edge, class BBInfo> class CFGMST {
5253

5354
BranchProbabilityInfo *const BPI;
5455
BlockFrequencyInfo *const BFI;
56+
LoopInfo *const LI;
5557

5658
// If function entry will be always instrumented.
5759
const bool InstrumentFuncEntry;
5860

61+
// If true loop entries will be always instrumented.
62+
const bool InstrumentLoopEntries;
63+
5964
// Find the root group of the G and compress the path from G to the root.
6065
BBInfo *findAndCompressGroup(BBInfo *G) {
6166
if (G->Group != G)
@@ -154,6 +159,11 @@ template <class Edge, class BBInfo> class CFGMST {
154159
}
155160
if (BPI != nullptr)
156161
Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
162+
// If InstrumentLoopEntries is on and TargetBB is a loop head (i.e.,
163+
// the current edge leads to a loop), set Weight to be minimal, so
164+
// that the edge won't be chosen for the MST and will be instrumented.
165+
if (InstrumentLoopEntries && LI->isLoopHeader(TargetBB))
166+
Weight = 0;
157167
if (Weight == 0)
158168
Weight++;
159169
auto *E = &addEdge(&BB, TargetBB, Weight);
@@ -291,10 +301,14 @@ template <class Edge, class BBInfo> class CFGMST {
291301
return *AllEdges.back();
292302
}
293303

294-
CFGMST(Function &Func, bool InstrumentFuncEntry,
304+
CFGMST(Function &Func, bool InstrumentFuncEntry, bool InstrumentLoopEntries,
295305
BranchProbabilityInfo *BPI = nullptr,
296-
BlockFrequencyInfo *BFI = nullptr)
297-
: F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) {
306+
BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr)
307+
: F(Func), BPI(BPI), BFI(BFI), LI(LI),
308+
InstrumentFuncEntry(InstrumentFuncEntry),
309+
InstrumentLoopEntries(InstrumentLoopEntries) {
310+
assert(!(InstrumentLoopEntries && !LI) &&
311+
"expected a LoopInfo to instrumenting loop entries");
298312
buildEdges();
299313
sortEdgesByWeight();
300314
computeMinimumSpanningTree();

llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,8 @@ bool GCOVProfiler::emitProfileNotes(
820820
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
821821
BFI);
822822

823-
CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
823+
CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry=*/false,
824+
/*InstrumentLoopEntries=*/false, BPI, BFI);
824825

825826
// getInstrBB can split basic blocks and push elements to AllEdges.
826827
for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ static cl::opt<bool> PGOInstrumentEntry(
259259
"pgo-instrument-entry", cl::init(false), cl::Hidden,
260260
cl::desc("Force to instrument function entry basicblock."));
261261

262+
static cl::opt<bool>
263+
PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
264+
cl::Hidden,
265+
cl::desc("Force to instrument loop entries."));
266+
262267
static cl::opt<bool> PGOFunctionEntryCoverage(
263268
"pgo-function-entry-coverage", cl::Hidden,
264269
cl::desc(
@@ -359,6 +364,7 @@ class FunctionInstrumenter final {
359364
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
360365
BranchProbabilityInfo *const BPI;
361366
BlockFrequencyInfo *const BFI;
367+
LoopInfo *const LI;
362368

363369
const PGOInstrumentationType InstrumentationType;
364370

@@ -376,14 +382,17 @@ class FunctionInstrumenter final {
376382
InstrumentationType == PGOInstrumentationType::CTXPROF;
377383
}
378384

385+
bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
386+
379387
public:
380388
FunctionInstrumenter(
381389
Module &M, Function &F, TargetLibraryInfo &TLI,
382390
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
383391
BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
392+
LoopInfo *LI = nullptr,
384393
PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
385394
: M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
386-
InstrumentationType(InstrumentationType) {}
395+
LI(LI), InstrumentationType(InstrumentationType) {}
387396

388397
void instrument();
389398
};
@@ -625,12 +634,13 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
625634
Function &Func, TargetLibraryInfo &TLI,
626635
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
627636
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
628-
BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
629-
bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
637+
BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
638+
bool IsCS = false, bool InstrumentFuncEntry = true,
639+
bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
630640
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
631641
TLI(TLI), ValueSites(IPVK_Last + 1),
632642
SIVisitor(Func, HasSingleByteCoverage),
633-
MST(F, InstrumentFuncEntry, BPI, BFI),
643+
MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
634644
BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
635645
if (BCI && PGOViewBlockCoverageGraph)
636646
BCI->viewBlockCoverageGraph();
@@ -916,9 +926,10 @@ void FunctionInstrumenter::instrument() {
916926

917927
const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
918928
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
919-
F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI,
929+
F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
920930
InstrumentationType == PGOInstrumentationType::CSFDO,
921-
shouldInstrumentEntryBB(), PGOBlockCoverage);
931+
shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
932+
PGOBlockCoverage);
922933

923934
auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
924935
auto *const CFGHash =
@@ -1136,11 +1147,13 @@ class PGOUseFunc {
11361147
PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
11371148
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
11381149
BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1139-
ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1150+
LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1151+
bool InstrumentFuncEntry, bool InstrumentLoopEntries,
11401152
bool HasSingleByteCoverage)
11411153
: F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1142-
FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1143-
InstrumentFuncEntry, HasSingleByteCoverage),
1154+
FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1155+
InstrumentFuncEntry, InstrumentLoopEntries,
1156+
HasSingleByteCoverage),
11441157
FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
11451158

11461159
void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
@@ -1923,6 +1936,7 @@ static bool InstrumentAllFunctions(
19231936
Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
19241937
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
19251938
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1939+
function_ref<LoopInfo *(Function &)> LookupLI,
19261940
PGOInstrumentationType InstrumentationType) {
19271941
// For the context-sensitve instrumentation, we should have a separated pass
19281942
// (before LTO/ThinLTO linking) to create these variables.
@@ -1943,10 +1957,11 @@ static bool InstrumentAllFunctions(
19431957
for (auto &F : M) {
19441958
if (skipPGOGen(F))
19451959
continue;
1946-
auto &TLI = LookupTLI(F);
1947-
auto *BPI = LookupBPI(F);
1948-
auto *BFI = LookupBFI(F);
1949-
FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI,
1960+
TargetLibraryInfo &TLI = LookupTLI(F);
1961+
BranchProbabilityInfo *BPI = LookupBPI(F);
1962+
BlockFrequencyInfo *BFI = LookupBFI(F);
1963+
LoopInfo *LI = LookupLI(F);
1964+
FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
19501965
InstrumentationType);
19511966
FI.instrument();
19521967
}
@@ -1980,8 +1995,11 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
19801995
auto LookupBFI = [&FAM](Function &F) {
19811996
return &FAM.getResult<BlockFrequencyAnalysis>(F);
19821997
};
1998+
auto LookupLI = [&FAM](Function &F) {
1999+
return &FAM.getResult<LoopAnalysis>(F);
2000+
};
19832001

1984-
if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI,
2002+
if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
19852003
InstrumentationType))
19862004
return PreservedAnalyses::all();
19872005

@@ -2116,7 +2134,8 @@ static bool annotateAllFunctions(
21162134
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
21172135
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
21182136
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2119-
ProfileSummaryInfo *PSI, bool IsCS) {
2137+
function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
2138+
bool IsCS) {
21202139
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
21212140
auto &Ctx = M.getContext();
21222141
// Read the counter array from file.
@@ -2181,22 +2200,26 @@ static bool annotateAllFunctions(
21812200
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
21822201
if (PGOInstrumentEntry.getNumOccurrences() > 0)
21832202
InstrumentFuncEntry = PGOInstrumentEntry;
2203+
bool InstrumentLoopEntries =
2204+
(PGOInstrumentLoopEntries.getNumOccurrences() > 0);
21842205

21852206
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
21862207
for (auto &F : M) {
21872208
if (skipPGOUse(F))
21882209
continue;
2189-
auto &TLI = LookupTLI(F);
2190-
auto *BPI = LookupBPI(F);
2191-
auto *BFI = LookupBFI(F);
2210+
TargetLibraryInfo &TLI = LookupTLI(F);
2211+
BranchProbabilityInfo *BPI = LookupBPI(F);
2212+
BlockFrequencyInfo *BFI = LookupBFI(F);
2213+
LoopInfo *LI = LookupLI(F);
21922214
if (!HasSingleByteCoverage) {
21932215
// Split indirectbr critical edges here before computing the MST rather
21942216
// than later in getInstrBB() to avoid invalidating it.
21952217
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
21962218
BFI);
21972219
}
2198-
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2199-
InstrumentFuncEntry, HasSingleByteCoverage);
2220+
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2221+
InstrumentFuncEntry, InstrumentLoopEntries,
2222+
HasSingleByteCoverage);
22002223
if (HasSingleByteCoverage) {
22012224
Func.populateCoverage(PGOReader.get());
22022225
continue;
@@ -2335,10 +2358,14 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
23352358
auto LookupBFI = [&FAM](Function &F) {
23362359
return &FAM.getResult<BlockFrequencyAnalysis>(F);
23372360
};
2361+
auto LookupLI = [&FAM](Function &F) {
2362+
return &FAM.getResult<LoopAnalysis>(F);
2363+
};
23382364

23392365
auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
23402366
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2341-
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2367+
LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2368+
IsCS))
23422369
return PreservedAnalyses::all();
23432370

23442371
return PreservedAnalyses::none();
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=false -S | FileCheck %s --check-prefixes=GEN,NOTLOOPENTRIES
2+
; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=true -S | FileCheck %s --check-prefixes=GEN,LOOPENTRIES
3+
; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-entry=true -S | FileCheck %s --check-prefixes=GEN,FUNCTIONENTRY
4+
5+
; GEN: $__llvm_profile_raw_version = comdat any
6+
; GEN: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
7+
; GEN: @__profn_test_simple_for_with_bypass = private constant [27 x i8] c"test_simple_for_with_bypass"
8+
9+
define i32 @test_simple_for_with_bypass(i32 %n) {
10+
entry:
11+
; GEN: entry:
12+
; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
13+
; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
14+
; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
15+
%mask = and i32 %n, 65535
16+
%skip = icmp eq i32 %mask, 0
17+
br i1 %skip, label %end, label %for.entry
18+
19+
for.entry:
20+
; GEN: for.entry:
21+
; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
22+
; NOTLOOPENTRIES-NOT: call void @llvm.instrprof.increment
23+
; FUNCTIONENTRY-NOT: call void @llvm.instrprof.increment
24+
br label %for.cond
25+
26+
for.cond:
27+
; GEN: for.cond:
28+
; GEN-NOT: call void @llvm.instrprof.increment
29+
%i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ]
30+
%sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ]
31+
%cmp = icmp slt i32 %i, %n
32+
br i1 %cmp, label %for.body, label %for.end, !prof !1
33+
34+
for.body:
35+
; GEN: for.body:
36+
; GEN-NOT: call void @llvm.instrprof.increment
37+
%inc = add nsw i32 %sum, 1
38+
br label %for.inc
39+
40+
for.inc:
41+
; GEN: for.inc:
42+
; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
43+
; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
44+
; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
45+
%inc1 = add nsw i32 %i, 1
46+
br label %for.cond
47+
48+
for.end:
49+
; GEN: for.end:
50+
; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
51+
; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
52+
; LOOPENTRIES-NOT: call void @llvm.instrprof.increment
53+
br label %end
54+
55+
end:
56+
; GEN: end:
57+
; GEN-NOT: call void @llvm.instrprof.increment
58+
%final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ]
59+
ret i32 %final_sum
60+
}
61+
62+
!1 = !{!"branch_weights", i32 100000, i32 80}

0 commit comments

Comments
 (0)