Skip to content

Commit 2c3f0e5

Browse files
authored
[CodeGen] Preserve branch weights from PGO profile during instruction selection at -O0 (llvm#161620)
Branch probabilities from PGO profile data were not preserved during instruction selection at -O0 because BranchProbabilityInfo was only requested when OptLevel != None.
1 parent b6fbf66 commit 2c3f0e5

File tree

3 files changed

+113
-8
lines changed

3 files changed

+113
-8
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,19 @@ static bool dontUseFastISelFor(const Function &Fn) {
234234
});
235235
}
236236

237+
static bool maintainPGOProfile(const TargetMachine &TM,
238+
CodeGenOptLevel OptLevel) {
239+
if (OptLevel != CodeGenOptLevel::None)
240+
return true;
241+
if (TM.getPGOOption()) {
242+
const PGOOptions &Options = *TM.getPGOOption();
243+
return Options.Action == PGOOptions::PGOAction::IRUse ||
244+
Options.Action == PGOOptions::PGOAction::SampleUse ||
245+
Options.CSAction == PGOOptions::CSPGOAction::CSIRUse;
246+
}
247+
return false;
248+
}
249+
237250
namespace llvm {
238251

239252
//===--------------------------------------------------------------------===//
@@ -395,6 +408,7 @@ SelectionDAGISel::~SelectionDAGISel() { delete CurDAG; }
395408

396409
void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
397410
CodeGenOptLevel OptLevel = Selector->OptLevel;
411+
bool RegisterPGOPasses = maintainPGOProfile(Selector->TM, Selector->OptLevel);
398412
if (OptLevel != CodeGenOptLevel::None)
399413
AU.addRequired<AAResultsWrapperPass>();
400414
AU.addRequired<GCModuleInfo>();
@@ -403,15 +417,15 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
403417
AU.addRequired<TargetLibraryInfoWrapperPass>();
404418
AU.addRequired<TargetTransformInfoWrapperPass>();
405419
AU.addRequired<AssumptionCacheTracker>();
406-
if (UseMBPI && OptLevel != CodeGenOptLevel::None)
407-
AU.addRequired<BranchProbabilityInfoWrapperPass>();
420+
if (UseMBPI && RegisterPGOPasses)
421+
AU.addRequired<BranchProbabilityInfoWrapperPass>();
408422
AU.addRequired<ProfileSummaryInfoWrapperPass>();
409423
// AssignmentTrackingAnalysis only runs if assignment tracking is enabled for
410424
// the module.
411425
AU.addRequired<AssignmentTrackingAnalysis>();
412426
AU.addPreserved<AssignmentTrackingAnalysis>();
413-
if (OptLevel != CodeGenOptLevel::None)
414-
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
427+
if (RegisterPGOPasses)
428+
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
415429
MachineFunctionPass::getAnalysisUsage(AU);
416430
}
417431

@@ -464,6 +478,7 @@ void SelectionDAGISel::initializeAnalysisResults(
464478
(void)MatchFilterFuncName;
465479
#endif
466480

481+
bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel);
467482
TII = MF->getSubtarget().getInstrInfo();
468483
TLI = MF->getSubtarget().getTargetLowering();
469484
RegInfo = &MF->getRegInfo();
@@ -474,7 +489,7 @@ void SelectionDAGISel::initializeAnalysisResults(
474489
auto *PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(*Fn.getParent());
475490
BlockFrequencyInfo *BFI = nullptr;
476491
FAM.getResult<BlockFrequencyAnalysis>(Fn);
477-
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
492+
if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses)
478493
BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn);
479494

480495
FunctionVarLocs const *FnVarLocs = nullptr;
@@ -492,7 +507,7 @@ void SelectionDAGISel::initializeAnalysisResults(
492507
// into account). That's unfortunate but OK because it just means we won't
493508
// ask for passes that have been required anyway.
494509

495-
if (UseMBPI && OptLevel != CodeGenOptLevel::None)
510+
if (UseMBPI && RegisterPGOPasses)
496511
FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn);
497512
else
498513
FuncInfo->BPI = nullptr;
@@ -518,6 +533,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
518533
(void)MatchFilterFuncName;
519534
#endif
520535

536+
bool RegisterPGOPasses = maintainPGOProfile(TM, OptLevel);
521537
TII = MF->getSubtarget().getInstrInfo();
522538
TLI = MF->getSubtarget().getTargetLowering();
523539
RegInfo = &MF->getRegInfo();
@@ -528,7 +544,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
528544
AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn);
529545
auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
530546
BlockFrequencyInfo *BFI = nullptr;
531-
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None)
547+
if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses)
532548
BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI();
533549

534550
FunctionVarLocs const *FnVarLocs = nullptr;
@@ -549,7 +565,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) {
549565
// into account). That's unfortunate but OK because it just means we won't
550566
// ask for passes that have been required anyway.
551567

552-
if (UseMBPI && OptLevel != CodeGenOptLevel::None)
568+
if (UseMBPI && RegisterPGOPasses)
553569
FuncInfo->BPI =
554570
&MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
555571
else
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=PASSES
2+
; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-only=branch-prob %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=BRANCH_PROB
3+
; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -stop-after=finalize-isel %s -o - | FileCheck %s --check-prefix=MIR
4+
5+
; REQUIRES: asserts
6+
7+
; This test verifies that PGO profile information (branch weights) is preserved
8+
; during instruction selection at -O0.
9+
10+
; Test function with explicit branch weights from PGO.
11+
define i32 @test_pgo_preservation(i32 %x) !prof !15 {
12+
entry:
13+
%cmp = icmp sgt i32 %x, 10
14+
; This branch has bias: 97 taken vs 3 not taken
15+
br i1 %cmp, label %if.then, label %if.else, !prof !16
16+
17+
if.then:
18+
; Hot path - should have high frequency
19+
%add = add nsw i32 %x, 100
20+
br label %if.end
21+
22+
if.else:
23+
; Cold path - should have low frequency
24+
%sub = sub nsw i32 %x, 50
25+
br label %if.end
26+
27+
if.end:
28+
%result = phi i32 [ %add, %if.then ], [ %sub, %if.else ]
29+
ret i32 %result
30+
}
31+
32+
; Profile metadata with branch weights 97:3.
33+
!15 = !{!"function_entry_count", i64 100}
34+
!16 = !{!"branch_weights", i32 97, i32 3}
35+
36+
; Verify that Branch Probability Analysis runs at O0.
37+
; PASSES: Branch Probability Analysis
38+
39+
; Verify that the branch probabilities reflect the exact profile data.
40+
; BRANCH_PROB: ---- Branch Probability Info : test_pgo_preservation ----
41+
; BRANCH_PROB: set edge entry -> 0 successor probability to {{.*}} = 97.00%
42+
; BRANCH_PROB: set edge entry -> 1 successor probability to {{.*}} = 3.00%
43+
44+
; Verify that machine IR preserves the branch probabilities from profile data
45+
; MIR: bb.0.entry:
46+
; MIR-NEXT: successors: %bb.{{[0-9]+}}({{0x03d70a3d|0x7c28f5c3}}), %bb.{{[0-9]+}}({{0x7c28f5c3|0x03d70a3d}})
47+
; The two successor probability values should be:
48+
; - 0x7c28f5c3: approximately 97% (high probability successor)
49+
; - 0x03d70a3d: approximately 3% (low probability successor)

llvm/tools/llc/llc.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "llvm/Support/FileSystem.h"
4545
#include "llvm/Support/FormattedStream.h"
4646
#include "llvm/Support/InitLLVM.h"
47+
#include "llvm/Support/PGOOptions.h"
4748
#include "llvm/Support/PluginLoader.h"
4849
#include "llvm/Support/SourceMgr.h"
4950
#include "llvm/Support/TargetSelect.h"
@@ -243,6 +244,39 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass(
243244
cl::desc("Run compiler only for specified passes (comma separated list)"),
244245
cl::value_desc("pass-name"), cl::location(RunPassOpt));
245246

247+
// PGO command line options
248+
enum PGOKind {
249+
NoPGO,
250+
SampleUse,
251+
};
252+
253+
static cl::opt<PGOKind>
254+
PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden,
255+
cl::desc("The kind of profile guided optimization"),
256+
cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."),
257+
clEnumValN(SampleUse, "pgo-sample-use-pipeline",
258+
"Use sampled profile to guide PGO.")));
259+
260+
// Function to set PGO options on TargetMachine based on command line flags.
261+
static void setPGOOptions(TargetMachine &TM) {
262+
std::optional<PGOOptions> PGOOpt;
263+
264+
switch (PGOKindFlag) {
265+
case SampleUse:
266+
// Use default values for other PGOOptions parameters. This parameter
267+
// is used to test that PGO data is preserved at -O0.
268+
PGOOpt = PGOOptions("", "", "", "", PGOOptions::SampleUse,
269+
PGOOptions::NoCSAction);
270+
break;
271+
case NoPGO:
272+
PGOOpt = std::nullopt;
273+
break;
274+
}
275+
276+
if (PGOOpt)
277+
TM.setPGOOption(PGOOpt);
278+
}
279+
246280
static int compileModule(char **, LLVMContext &);
247281

248282
[[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") {
@@ -558,6 +592,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
558592
TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl));
559593
assert(Target && "Could not allocate target machine!");
560594

595+
// Set PGO options based on command line flags
596+
setPGOOptions(*Target);
597+
561598
return Target->createDataLayout().getStringRepresentation();
562599
};
563600
if (InputLanguage == "mir" ||
@@ -601,6 +638,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
601638
TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl));
602639
assert(Target && "Could not allocate target machine!");
603640

641+
// Set PGO options based on command line flags
642+
setPGOOptions(*Target);
643+
604644
// If we don't have a module then just exit now. We do this down
605645
// here since the CPU/Feature help is underneath the target machine
606646
// creation.

0 commit comments

Comments
 (0)