Skip to content

Commit ac8da31

Browse files
committed
[PGO][PGSO] Handle MBFIWrapper
Some code gen passes use MBFIWrapper to keep track of the frequency of new blocks. This was not taken into account and could lead to incorrect frequencies as MBFI silently returns zero frequency for unknown/new blocks. Add a variant for MBFIWrapper in the PGSO query interface. Depends on D73494.
1 parent 2a1b5af commit ac8da31

File tree

10 files changed

+134
-16
lines changed

10 files changed

+134
-16
lines changed

llvm/include/llvm/CodeGen/MachineSizeOpts.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class ProfileSummaryInfo;
2121
class MachineBasicBlock;
2222
class MachineBlockFrequencyInfo;
2323
class MachineFunction;
24+
class MBFIWrapper;
2425

2526
/// Returns true if machine function \p MF is suggested to be size-optimized
2627
/// based on the profile.
@@ -33,6 +34,12 @@ bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
3334
ProfileSummaryInfo *PSI,
3435
const MachineBlockFrequencyInfo *MBFI,
3536
PGSOQueryType QueryType = PGSOQueryType::Other);
37+
/// Returns true if machine basic block \p MBB is suggested to be size-optimized
38+
/// based on the profile.
39+
bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
40+
ProfileSummaryInfo *PSI,
41+
MBFIWrapper *MBFIWrapper,
42+
PGSOQueryType QueryType = PGSOQueryType::Other);
3643

3744
} // end namespace llvm
3845

llvm/include/llvm/CodeGen/TailDuplicator.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/DenseSet.h"
1919
#include "llvm/ADT/SetVector.h"
2020
#include "llvm/ADT/SmallVector.h"
21+
#include "llvm/CodeGen/MBFIWrapper.h"
2122
#include "llvm/CodeGen/TargetInstrInfo.h"
2223
#include <utility>
2324
#include <vector>
@@ -42,7 +43,7 @@ class TailDuplicator {
4243
const MachineModuleInfo *MMI;
4344
MachineRegisterInfo *MRI;
4445
MachineFunction *MF;
45-
const MachineBlockFrequencyInfo *MBFI;
46+
MBFIWrapper *MBFI;
4647
ProfileSummaryInfo *PSI;
4748
bool PreRegAlloc;
4849
bool LayoutMode;
@@ -69,7 +70,7 @@ class TailDuplicator {
6970
/// default implies using the command line value TailDupSize.
7071
void initMF(MachineFunction &MF, bool PreRegAlloc,
7172
const MachineBranchProbabilityInfo *MBPI,
72-
const MachineBlockFrequencyInfo *MBFI,
73+
MBFIWrapper *MBFI,
7374
ProfileSummaryInfo *PSI,
7475
bool LayoutMode, unsigned TailDupSize = 0);
7576

llvm/include/llvm/Transforms/Utils/SizeOpts.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,9 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
6363
F, PSI, *BFI);
6464
}
6565

66-
template<typename AdapterT, typename BlockT, typename BFIT>
67-
bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
66+
template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
67+
bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
6868
BFIT *BFI, PGSOQueryType QueryType) {
69-
assert(BB);
7069
if (!PSI || !BFI || !PSI->hasProfileSummary())
7170
return false;
7271
if (ForcePGSO)
@@ -81,11 +80,11 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
8180
if (PGSOColdCodeOnly ||
8281
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
8382
// Even if the working set size isn't large, size-optimize cold code.
84-
return AdapterT::isColdBlock(BB, PSI, BFI);
83+
return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
8584
}
8685
return !AdapterT::isHotBlockNthPercentile(
8786
PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
88-
BB, PSI, BFI);
87+
BBOrBlockFreq, PSI, BFI);
8988
}
9089

9190
/// Returns true if function \p F is suggested to be size-optimized based on the

llvm/lib/CodeGen/BranchFolding.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -655,8 +655,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
655655
MachineFunction *MF = MBB1->getParent();
656656
bool OptForSize =
657657
MF->getFunction().hasOptSize() ||
658-
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
659-
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
658+
(llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
659+
llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
660660
return EffectiveTailLen >= 2 && OptForSize &&
661661
(FullBlockTail1 || FullBlockTail2);
662662
}
@@ -1511,7 +1511,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
15111511

15121512
bool OptForSize =
15131513
MF.getFunction().hasOptSize() ||
1514-
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
1514+
llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
15151515
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
15161516
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
15171517
// direction, thereby defeating careful block placement and regressing

llvm/lib/CodeGen/MachineBlockPlacement.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,8 +2082,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
20822082
// In practice this never happens though: there always seems to be a preheader
20832083
// that can fallthrough and that is also placed before the header.
20842084
bool OptForSize = F->getFunction().hasOptSize() ||
2085-
llvm::shouldOptimizeForSize(L.getHeader(), PSI,
2086-
&MBFI->getMBFI());
2085+
llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
20872086
if (OptForSize)
20882087
return L.getHeader();
20892088

@@ -2841,7 +2840,7 @@ void MachineBlockPlacement::alignBlocks() {
28412840
continue;
28422841

28432842
// If the global profiles indicates so, don't align it.
2844-
if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
2843+
if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
28452844
!TLI->alignLoopsWithOptSize())
28462845
continue;
28472846

@@ -3088,7 +3087,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
30883087
if (OptForSize)
30893088
TailDupSize = 1;
30903089
bool PreRegAlloc = false;
3091-
TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
3090+
TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
30923091
/* LayoutMode */ true, TailDupSize);
30933092
precomputeTriangleChains();
30943093
}

llvm/lib/CodeGen/MachineSizeOpts.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "llvm/CodeGen/MachineSizeOpts.h"
15+
#include "llvm/CodeGen/MBFIWrapper.h"
1516
#include "llvm/Analysis/ProfileSummaryInfo.h"
1617
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
1718

@@ -33,6 +34,13 @@ bool isColdBlock(const MachineBasicBlock *MBB,
3334
return Count && PSI->isColdCount(*Count);
3435
}
3536

37+
bool isColdBlock(BlockFrequency BlockFreq,
38+
ProfileSummaryInfo *PSI,
39+
const MachineBlockFrequencyInfo *MBFI) {
40+
auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
41+
return Count && PSI->isColdCount(*Count);
42+
}
43+
3644
/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
3745
static bool isHotBlockNthPercentile(int PercentileCutoff,
3846
const MachineBasicBlock *MBB,
@@ -42,6 +50,14 @@ static bool isHotBlockNthPercentile(int PercentileCutoff,
4250
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
4351
}
4452

53+
static bool isHotBlockNthPercentile(int PercentileCutoff,
54+
BlockFrequency BlockFreq,
55+
ProfileSummaryInfo *PSI,
56+
const MachineBlockFrequencyInfo *MBFI) {
57+
auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
58+
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
59+
}
60+
4561
/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
4662
/// MachineFunction.
4763
bool isFunctionColdInCallGraph(
@@ -95,13 +111,25 @@ struct MachineBasicBlockBFIAdapter {
95111
const MachineBlockFrequencyInfo *MBFI) {
96112
return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
97113
}
114+
static bool isColdBlock(BlockFrequency BlockFreq,
115+
ProfileSummaryInfo *PSI,
116+
const MachineBlockFrequencyInfo *MBFI) {
117+
return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
118+
}
98119
static bool isHotBlockNthPercentile(int CutOff,
99120
const MachineBasicBlock *MBB,
100121
ProfileSummaryInfo *PSI,
101122
const MachineBlockFrequencyInfo *MBFI) {
102123
return machine_size_opts_detail::isHotBlockNthPercentile(
103124
CutOff, MBB, PSI, MBFI);
104125
}
126+
static bool isHotBlockNthPercentile(int CutOff,
127+
BlockFrequency BlockFreq,
128+
ProfileSummaryInfo *PSI,
129+
const MachineBlockFrequencyInfo *MBFI) {
130+
return machine_size_opts_detail::isHotBlockNthPercentile(
131+
CutOff, BlockFreq, PSI, MBFI);
132+
}
105133
};
106134
} // end anonymous namespace
107135

@@ -117,6 +145,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
117145
ProfileSummaryInfo *PSI,
118146
const MachineBlockFrequencyInfo *MBFI,
119147
PGSOQueryType QueryType) {
148+
assert(MBB);
120149
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
121150
MBB, PSI, MBFI, QueryType);
122151
}
152+
153+
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
154+
ProfileSummaryInfo *PSI,
155+
MBFIWrapper *MBFIW,
156+
PGSOQueryType QueryType) {
157+
assert(MBB);
158+
if (!PSI || !MBFIW)
159+
return false;
160+
BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
161+
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
162+
BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
163+
}

llvm/lib/CodeGen/TailDuplication.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ namespace {
3131

3232
class TailDuplicateBase : public MachineFunctionPass {
3333
TailDuplicator Duplicator;
34+
std::unique_ptr<MBFIWrapper> MBFIW;
3435
bool PreRegAlloc;
3536
public:
3637
TailDuplicateBase(char &PassID, bool PreRegAlloc)
@@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
8889
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
8990
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
9091
nullptr;
91-
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
92+
if (MBFI)
93+
MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
94+
Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
95+
/*LayoutMode=*/false);
9296

9397
bool MadeChange = false;
9498
while (Duplicator.tailDuplicateBlocks())

llvm/lib/CodeGen/TailDuplicator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
8080

8181
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
8282
const MachineBranchProbabilityInfo *MBPIin,
83-
const MachineBlockFrequencyInfo *MBFIin,
83+
MBFIWrapper *MBFIin,
8484
ProfileSummaryInfo *PSIin,
8585
bool LayoutModeIn, unsigned TailDupSizeIn) {
8686
MF = &MFin;

llvm/lib/Transforms/Utils/SizeOpts.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
8484
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
8585
BlockFrequencyInfo *BFI,
8686
PGSOQueryType QueryType) {
87+
assert(BB);
8788
return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
8889
QueryType);
8990
}

llvm/test/CodeGen/X86/tail-opts.ll

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,72 @@ cont4:
846846
ret void
847847
}
848848

849+
; This triggers a situation where a new block (bb4 is split) is created and then
850+
; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
851+
@GV = global i32 0
852+
define void @bfi_new_block_pgso(i32 %c) nounwind {
853+
; CHECK-LABEL: bfi_new_block_pgso:
854+
; CHECK: # %bb.0: # %entry
855+
; CHECK-NEXT: testl %edi, %edi
856+
; CHECK-NEXT: je .LBB14_4
857+
; CHECK-NEXT: # %bb.1: # %bb1
858+
; CHECK-NEXT: pushq %rax
859+
; CHECK-NEXT: cmpl $16, %edi
860+
; CHECK-NEXT: je .LBB14_6
861+
; CHECK-NEXT: # %bb.2: # %bb1
862+
; CHECK-NEXT: cmpl $17, %edi
863+
; CHECK-NEXT: je .LBB14_7
864+
; CHECK-NEXT: # %bb.3: # %bb4
865+
; CHECK-NEXT: popq %rax
866+
; CHECK-NEXT: jmp tail_call_me # TAILCALL
867+
; CHECK-NEXT: .LBB14_4: # %bb5
868+
; CHECK-NEXT: cmpl $128, %edi
869+
; CHECK-NEXT: jne .LBB14_8
870+
; CHECK-NEXT: # %bb.5: # %return
871+
; CHECK-NEXT: retq
872+
; CHECK-NEXT: .LBB14_6: # %bb3
873+
; CHECK-NEXT: movl $0, {{.*}}(%rip)
874+
; CHECK-NEXT: .LBB14_7: # %bb4
875+
; CHECK-NEXT: callq func
876+
; CHECK-NEXT: popq %rax
877+
; CHECK-NEXT: .LBB14_8: # %bb6
878+
; CHECK-NEXT: jmp tail_call_me # TAILCALL
879+
entry:
880+
%0 = icmp eq i32 %c, 0
881+
br i1 %0, label %bb5, label %bb1
882+
883+
bb1:
884+
switch i32 %c, label %bb4 [
885+
i32 16, label %bb3
886+
i32 17, label %bb2
887+
]
888+
889+
bb2:
890+
call void @func()
891+
br label %bb4
892+
893+
bb3:
894+
store i32 0, i32* @GV
895+
call void @func()
896+
br label %bb4
897+
898+
bb4:
899+
tail call void @tail_call_me()
900+
br label %return
901+
902+
bb5:
903+
switch i32 %c, label %bb6 [
904+
i32 128, label %return
905+
]
906+
907+
bb6:
908+
tail call void @tail_call_me()
909+
br label %return
910+
911+
return:
912+
ret void
913+
}
914+
849915
!llvm.module.flags = !{!0}
850916
!0 = !{i32 1, !"ProfileSummary", !1}
851917
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}

0 commit comments

Comments
 (0)