Skip to content

Commit 99c6f87

Browse files
committed
[PGO] Supporting code for always instrumenting loop entries
This patch includes the supporting code that enables always instrumenting the loop entry blocks by default. This is a generalization of 19fb5b4. In some cases loop exit edges may never have a chance to be executed (for instance the program is an event handling loop), so it may be preferable to instrument to loop entry edges. This patch will NOT change the default behavior.
1 parent 7f9d348 commit 99c6f87

File tree

4 files changed

+125
-18
lines changed

4 files changed

+125
-18
lines changed

llvm/include/llvm/Transforms/Instrumentation/CFGMST.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/Analysis/BlockFrequencyInfo.h"
2020
#include "llvm/Analysis/BranchProbabilityInfo.h"
2121
#include "llvm/Analysis/CFG.h"
22+
#include "llvm/Analysis/LoopInfo.h"
2223
#include "llvm/IR/Instructions.h"
2324
#include "llvm/IR/IntrinsicInst.h"
2425
#include "llvm/Support/BranchProbability.h"
@@ -52,10 +53,14 @@ template <class Edge, class BBInfo> class CFGMST {
5253

5354
BranchProbabilityInfo *const BPI;
5455
BlockFrequencyInfo *const BFI;
56+
LoopInfo *const LI;
5557

5658
// If function entry will be always instrumented.
5759
const bool InstrumentFuncEntry;
5860

61+
// If true loop entries will be always instrumented.
62+
const bool InstrumentLoopEntries;
63+
5964
// Find the root group of the G and compress the path from G to the root.
6065
BBInfo *findAndCompressGroup(BBInfo *G) {
6166
if (G->Group != G)
@@ -154,6 +159,9 @@ template <class Edge, class BBInfo> class CFGMST {
154159
}
155160
if (BPI != nullptr)
156161
Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
162+
if (InstrumentLoopEntries && LI != nullptr &&
163+
LI->isLoopHeader(TargetBB))
164+
Weight = 0;
157165
if (Weight == 0)
158166
Weight++;
159167
auto *E = &addEdge(&BB, TargetBB, Weight);
@@ -291,10 +299,12 @@ template <class Edge, class BBInfo> class CFGMST {
291299
return *AllEdges.back();
292300
}
293301

294-
CFGMST(Function &Func, bool InstrumentFuncEntry,
302+
CFGMST(Function &Func, bool InstrumentFuncEntry, bool InstrumentLoopEntries,
295303
BranchProbabilityInfo *BPI = nullptr,
296-
BlockFrequencyInfo *BFI = nullptr)
297-
: F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) {
304+
BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr)
305+
: F(Func), BPI(BPI), BFI(BFI), LI(LI),
306+
InstrumentFuncEntry(InstrumentFuncEntry),
307+
InstrumentLoopEntries(InstrumentLoopEntries) {
298308
buildEdges();
299309
sortEdgesByWeight();
300310
computeMinimumSpanningTree();

llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -820,7 +820,7 @@ bool GCOVProfiler::emitProfileNotes(
820820
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
821821
BFI);
822822

823-
CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
823+
CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, /*InstrumentLoopEntries_=*/false, BPI, BFI);
824824

825825
// getInstrBB can split basic blocks and push elements to AllEdges.
826826
for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {

llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,11 @@ static cl::opt<bool> PGOInstrumentEntry(
259259
"pgo-instrument-entry", cl::init(false), cl::Hidden,
260260
cl::desc("Force to instrument function entry basicblock."));
261261

262+
static cl::opt<bool>
263+
PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
264+
cl::Hidden,
265+
cl::desc("Force to instrument loop entries."));
266+
262267
static cl::opt<bool> PGOFunctionEntryCoverage(
263268
"pgo-function-entry-coverage", cl::Hidden,
264269
cl::desc(
@@ -359,6 +364,7 @@ class FunctionInstrumenter final {
359364
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
360365
BranchProbabilityInfo *const BPI;
361366
BlockFrequencyInfo *const BFI;
367+
LoopInfo *const LI;
362368

363369
const PGOInstrumentationType InstrumentationType;
364370

@@ -376,14 +382,17 @@ class FunctionInstrumenter final {
376382
InstrumentationType == PGOInstrumentationType::CTXPROF;
377383
}
378384

385+
bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
386+
379387
public:
380388
FunctionInstrumenter(
381389
Module &M, Function &F, TargetLibraryInfo &TLI,
382390
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
383391
BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
392+
LoopInfo *LI = nullptr,
384393
PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
385394
: M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
386-
InstrumentationType(InstrumentationType) {}
395+
LI(LI), InstrumentationType(InstrumentationType) {}
387396

388397
void instrument();
389398
};
@@ -625,12 +634,13 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
625634
Function &Func, TargetLibraryInfo &TLI,
626635
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
627636
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
628-
BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
629-
bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
637+
BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
638+
bool IsCS = false, bool InstrumentFuncEntry = true,
639+
bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
630640
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
631641
TLI(TLI), ValueSites(IPVK_Last + 1),
632642
SIVisitor(Func, HasSingleByteCoverage),
633-
MST(F, InstrumentFuncEntry, BPI, BFI),
643+
MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
634644
BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
635645
if (BCI && PGOViewBlockCoverageGraph)
636646
BCI->viewBlockCoverageGraph();
@@ -916,9 +926,10 @@ void FunctionInstrumenter::instrument() {
916926

917927
const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
918928
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
919-
F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI,
929+
F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
920930
InstrumentationType == PGOInstrumentationType::CSFDO,
921-
shouldInstrumentEntryBB(), PGOBlockCoverage);
931+
shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
932+
PGOBlockCoverage);
922933

923934
auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
924935
auto *const CFGHash =
@@ -1136,11 +1147,13 @@ class PGOUseFunc {
11361147
PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
11371148
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
11381149
BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1139-
ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1150+
LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
1151+
bool InstrumentFuncEntry, bool InstrumentLoopEntries,
11401152
bool HasSingleByteCoverage)
11411153
: F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1142-
FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1143-
InstrumentFuncEntry, HasSingleByteCoverage),
1154+
FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
1155+
InstrumentFuncEntry, InstrumentLoopEntries,
1156+
HasSingleByteCoverage),
11441157
FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
11451158

11461159
void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
@@ -1923,6 +1936,7 @@ static bool InstrumentAllFunctions(
19231936
Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
19241937
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
19251938
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1939+
function_ref<LoopInfo *(Function &)> LookupLI,
19261940
PGOInstrumentationType InstrumentationType) {
19271941
// For the context-sensitve instrumentation, we should have a separated pass
19281942
// (before LTO/ThinLTO linking) to create these variables.
@@ -1946,7 +1960,8 @@ static bool InstrumentAllFunctions(
19461960
auto &TLI = LookupTLI(F);
19471961
auto *BPI = LookupBPI(F);
19481962
auto *BFI = LookupBFI(F);
1949-
FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI,
1963+
auto *LI = LookupLI(F);
1964+
FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
19501965
InstrumentationType);
19511966
FI.instrument();
19521967
}
@@ -1980,8 +1995,11 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
19801995
auto LookupBFI = [&FAM](Function &F) {
19811996
return &FAM.getResult<BlockFrequencyAnalysis>(F);
19821997
};
1998+
auto LookupLI = [&FAM](Function &F) {
1999+
return &FAM.getResult<LoopAnalysis>(F);
2000+
};
19832001

1984-
if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI,
2002+
if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
19852003
InstrumentationType))
19862004
return PreservedAnalyses::all();
19872005

@@ -2116,6 +2134,7 @@ static bool annotateAllFunctions(
21162134
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
21172135
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
21182136
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2137+
function_ref<LoopInfo *(Function &)> LookupLI,
21192138
ProfileSummaryInfo *PSI, bool IsCS) {
21202139
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
21212140
auto &Ctx = M.getContext();
@@ -2181,6 +2200,8 @@ static bool annotateAllFunctions(
21812200
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
21822201
if (PGOInstrumentEntry.getNumOccurrences() > 0)
21832202
InstrumentFuncEntry = PGOInstrumentEntry;
2203+
bool InstrumentLoopEntries =
2204+
(PGOInstrumentLoopEntries.getNumOccurrences() > 0);
21842205

21852206
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
21862207
for (auto &F : M) {
@@ -2189,14 +2210,16 @@ static bool annotateAllFunctions(
21892210
auto &TLI = LookupTLI(F);
21902211
auto *BPI = LookupBPI(F);
21912212
auto *BFI = LookupBFI(F);
2213+
auto *LI = LookupLI(F);
21922214
if (!HasSingleByteCoverage) {
21932215
// Split indirectbr critical edges here before computing the MST rather
21942216
// than later in getInstrBB() to avoid invalidating it.
21952217
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
21962218
BFI);
21972219
}
2198-
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2199-
InstrumentFuncEntry, HasSingleByteCoverage);
2220+
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
2221+
InstrumentFuncEntry, InstrumentLoopEntries,
2222+
HasSingleByteCoverage);
22002223
if (HasSingleByteCoverage) {
22012224
Func.populateCoverage(PGOReader.get());
22022225
continue;
@@ -2335,10 +2358,14 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
23352358
auto LookupBFI = [&FAM](Function &F) {
23362359
return &FAM.getResult<BlockFrequencyAnalysis>(F);
23372360
};
2361+
auto LookupLI = [&FAM](Function &F) {
2362+
return &FAM.getResult<LoopAnalysis>(F);
2363+
};
23382364

23392365
auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
23402366
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2341-
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2367+
LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
2368+
IsCS))
23422369
return PreservedAnalyses::all();
23432370

23442371
return PreservedAnalyses::none();
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
; RUN: opt < %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=false -S | FileCheck %s --check-prefixes=GEN,NOTLOOPENTRIES
2+
; RUN: opt < %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=true -S | FileCheck %s --check-prefixes=GEN,LOOPENTRIES
3+
; RUN: opt < %s -passes=pgo-instr-gen -pgo-instrument-entry=true -S | FileCheck %s --check-prefixes=GEN,FUNCTIONENTRY
4+
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
; GEN: $__llvm_profile_raw_version = comdat any
9+
; GEN: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
10+
; GEN: @__profn_test_simple_for_with_bypass = private constant [27 x i8] c"test_simple_for_with_bypass"
11+
12+
define i32 @test_simple_for_with_bypass(i32 %n) {
13+
entry:
14+
; GEN: entry:
15+
; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
16+
; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
17+
; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
18+
br label %bypass
19+
20+
bypass:
21+
; GEN: bypass:
22+
; GEN-NOT: call void @llvm.instrprof.increment
23+
%mask = and i32 %n, 65535
24+
%skip = icmp eq i32 %mask, 0
25+
br i1 %skip, label %end, label %for.entry
26+
27+
for.entry:
28+
; GEN: for.entry:
29+
; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
30+
; NOTLOOPENTRIES-NOT: call void @llvm.instrprof.increment
31+
; FUNCTIONENTRY-NOT: call void @llvm.instrprof.increment
32+
br label %for.cond
33+
34+
for.cond:
35+
; GEN: for.cond:
36+
; GEN-NOT: call void @llvm.instrprof.increment
37+
%i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ]
38+
%sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ]
39+
%cmp = icmp slt i32 %i, %n
40+
br i1 %cmp, label %for.body, label %for.end, !prof !1
41+
42+
for.body:
43+
; GEN: for.body:
44+
; GEN-NOT: call void @llvm.instrprof.increment
45+
%inc = add nsw i32 %sum, 1
46+
br label %for.inc
47+
48+
for.inc:
49+
; GEN: for.inc:
50+
; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
51+
; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
52+
; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
53+
%inc1 = add nsw i32 %i, 1
54+
br label %for.cond
55+
56+
for.end:
57+
; GEN: for.end:
58+
; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
59+
; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
60+
; LOOPENTRIES-NOT: call void @llvm.instrprof.increment
61+
br label %end
62+
63+
end:
64+
; GEN: end:
65+
; GEN-NOT: call void @llvm.instrprof.increment
66+
%final_sum = phi i32 [ %sum, %for.end ], [ 0, %bypass ]
67+
ret i32 %final_sum
68+
}
69+
70+
!1 = !{!"branch_weights", i32 100000, i32 80}

0 commit comments

Comments
 (0)