-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[CodeGen] Preserve branch weights from PGO profile during instruction selection at -O0 #161620
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
c55f594
6e4ebda
0af0751
67050dd
293b095
a4773d9
b870ddd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -229,6 +229,17 @@ static bool dontUseFastISelFor(const Function &Fn) { | |
}); | ||
} | ||
|
||
static bool shouldRegisterPGOPasses(const TargetMachine &TM, | ||
CodeGenOptLevel OptLevel) { | ||
bool RegisterPGOPasses = OptLevel != CodeGenOptLevel::None; | ||
|
||
if (!RegisterPGOPasses && TM.getPGOOption()) { | ||
const PGOOptions &Options = *TM.getPGOOption(); | ||
RegisterPGOPasses = Options.Action != PGOOptions::PGOAction::NoAction || | ||
Options.CSAction != PGOOptions::CSPGOAction::NoCSAction; | ||
|
||
} | ||
return RegisterPGOPasses; | ||
} | ||
|
||
namespace llvm { | ||
|
||
//===--------------------------------------------------------------------===// | ||
|
@@ -390,6 +401,8 @@ SelectionDAGISel::~SelectionDAGISel() { delete CurDAG; } | |
|
||
void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { | ||
CodeGenOptLevel OptLevel = Selector->OptLevel; | ||
bool RegisterPGOPasses = | ||
shouldRegisterPGOPasses(Selector->TM, Selector->OptLevel); | ||
if (OptLevel != CodeGenOptLevel::None) | ||
AU.addRequired<AAResultsWrapperPass>(); | ||
AU.addRequired<GCModuleInfo>(); | ||
|
@@ -398,15 +411,15 @@ void SelectionDAGISelLegacy::getAnalysisUsage(AnalysisUsage &AU) const { | |
AU.addRequired<TargetLibraryInfoWrapperPass>(); | ||
AU.addRequired<TargetTransformInfoWrapperPass>(); | ||
AU.addRequired<AssumptionCacheTracker>(); | ||
if (UseMBPI && OptLevel != CodeGenOptLevel::None) | ||
AU.addRequired<BranchProbabilityInfoWrapperPass>(); | ||
if (UseMBPI && RegisterPGOPasses) | ||
AU.addRequired<BranchProbabilityInfoWrapperPass>(); | ||
AU.addRequired<ProfileSummaryInfoWrapperPass>(); | ||
// AssignmentTrackingAnalysis only runs if assignment tracking is enabled for | ||
// the module. | ||
AU.addRequired<AssignmentTrackingAnalysis>(); | ||
AU.addPreserved<AssignmentTrackingAnalysis>(); | ||
if (OptLevel != CodeGenOptLevel::None) | ||
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); | ||
if (RegisterPGOPasses) | ||
LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); | ||
MachineFunctionPass::getAnalysisUsage(AU); | ||
} | ||
|
||
|
@@ -459,6 +472,7 @@ void SelectionDAGISel::initializeAnalysisResults( | |
(void)MatchFilterFuncName; | ||
#endif | ||
|
||
bool RegisterPGOPasses = shouldRegisterPGOPasses(TM, OptLevel); | ||
TII = MF->getSubtarget().getInstrInfo(); | ||
TLI = MF->getSubtarget().getTargetLowering(); | ||
RegInfo = &MF->getRegInfo(); | ||
|
@@ -469,7 +483,7 @@ void SelectionDAGISel::initializeAnalysisResults( | |
auto *PSI = MAMP.getCachedResult<ProfileSummaryAnalysis>(*Fn.getParent()); | ||
BlockFrequencyInfo *BFI = nullptr; | ||
FAM.getResult<BlockFrequencyAnalysis>(Fn); | ||
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) | ||
if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) | ||
BFI = &FAM.getResult<BlockFrequencyAnalysis>(Fn); | ||
|
||
FunctionVarLocs const *FnVarLocs = nullptr; | ||
|
@@ -487,7 +501,7 @@ void SelectionDAGISel::initializeAnalysisResults( | |
// into account). That's unfortunate but OK because it just means we won't | ||
// ask for passes that have been required anyway. | ||
|
||
if (UseMBPI && OptLevel != CodeGenOptLevel::None) | ||
if (UseMBPI && RegisterPGOPasses) | ||
FuncInfo->BPI = &FAM.getResult<BranchProbabilityAnalysis>(Fn); | ||
else | ||
FuncInfo->BPI = nullptr; | ||
|
@@ -513,6 +527,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { | |
(void)MatchFilterFuncName; | ||
#endif | ||
|
||
bool RegisterPGOPasses = shouldRegisterPGOPasses(TM, OptLevel); | ||
TII = MF->getSubtarget().getInstrInfo(); | ||
TLI = MF->getSubtarget().getTargetLowering(); | ||
RegInfo = &MF->getRegInfo(); | ||
|
@@ -523,7 +538,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { | |
AC = &MFP.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(Fn); | ||
auto *PSI = &MFP.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); | ||
BlockFrequencyInfo *BFI = nullptr; | ||
if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) | ||
if (PSI && PSI->hasProfileSummary() && RegisterPGOPasses) | ||
BFI = &MFP.getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); | ||
|
||
FunctionVarLocs const *FnVarLocs = nullptr; | ||
|
@@ -544,7 +559,7 @@ void SelectionDAGISel::initializeAnalysisResults(MachineFunctionPass &MFP) { | |
// into account). That's unfortunate but OK because it just means we won't | ||
// ask for passes that have been required anyway. | ||
|
||
if (UseMBPI && OptLevel != CodeGenOptLevel::None) | ||
if (UseMBPI && RegisterPGOPasses) | ||
FuncInfo->BPI = | ||
&MFP.getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); | ||
else | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-pass=Structure %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=PASSES | ||
; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -debug-only=branch-prob %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=BRANCH_PROB | ||
; RUN: llc -mtriple=x86_64-- -O0 -pgo-kind=pgo-sample-use-pipeline -stop-after=finalize-isel %s -o - | FileCheck %s --check-prefix=MIR | ||
|
||
; REQUIRES: asserts | ||
|
||
; This test verifies that PGO profile information (branch weights) is preserved | ||
; during instruction selection at -O0. | ||
|
||
; Test function with explicit branch weights from PGO. | ||
define i32 @test_pgo_preservation(i32 %x) !prof !15 { | ||
entry: | ||
%cmp = icmp sgt i32 %x, 10 | ||
; This branch has bias: 97 taken vs 3 not taken | ||
br i1 %cmp, label %if.then, label %if.else, !prof !16 | ||
|
||
if.then: | ||
; Hot path - should have high frequency | ||
%add = add nsw i32 %x, 100 | ||
br label %if.end | ||
|
||
if.else: | ||
; Cold path - should have low frequency | ||
%sub = sub nsw i32 %x, 50 | ||
br label %if.end | ||
|
||
if.end: | ||
%result = phi i32 [ %add, %if.then ], [ %sub, %if.else ] | ||
ret i32 %result | ||
} | ||
|
||
; Profile metadata with branch weights 97:3. | ||
!15 = !{!"function_entry_count", i64 100} | ||
!16 = !{!"branch_weights", i32 97, i32 3} | ||
|
||
; Verify that Branch Probability Analysis runs at O0. | ||
; PASSES: Branch Probability Analysis | ||
|
||
; Verify that the branch probabilities reflect the exact profile data. | ||
; BRANCH_PROB: ---- Branch Probability Info : test_pgo_preservation ---- | ||
; BRANCH_PROB: set edge entry -> 0 successor probability to {{.*}} = 97.00% | ||
; BRANCH_PROB: set edge entry -> 1 successor probability to {{.*}} = 3.00% | ||
|
||
; Verify that machine IR preserves the branch probabilities from profile data | ||
; MIR: bb.0.entry: | ||
; MIR-NEXT: successors: %bb.{{[0-9]+}}({{0x03d70a3d|0x7c28f5c3}}), %bb.{{[0-9]+}}({{0x7c28f5c3|0x03d70a3d}}) | ||
; The two successor probability values should be: | ||
; - 0x7c28f5c3: approximately 97% (high probability successor) | ||
; - 0x03d70a3d: approximately 3% (low probability successor) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,6 +44,7 @@ | |
#include "llvm/Support/FileSystem.h" | ||
#include "llvm/Support/FormattedStream.h" | ||
#include "llvm/Support/InitLLVM.h" | ||
#include "llvm/Support/PGOOptions.h" | ||
#include "llvm/Support/PluginLoader.h" | ||
#include "llvm/Support/SourceMgr.h" | ||
#include "llvm/Support/TargetSelect.h" | ||
|
@@ -239,6 +240,145 @@ static cl::opt<RunPassOption, true, cl::parser<std::string>> RunPass( | |
cl::desc("Run compiler only for specified passes (comma separated list)"), | ||
cl::value_desc("pass-name"), cl::location(RunPassOpt)); | ||
|
||
// PGO command line options | ||
enum PGOKind { | ||
NoPGO, | ||
InstrGen, | ||
InstrUse, | ||
SampleUse, | ||
}; | ||
|
||
enum CSPGOKind { | ||
NoCSPGO, | ||
CSInstrGen, | ||
CSInstrUse, | ||
}; | ||
|
||
static cl::opt<PGOKind> | ||
PGOKindFlag("pgo-kind", cl::init(NoPGO), cl::Hidden, | ||
cl::desc("The kind of profile guided optimization"), | ||
cl::values(clEnumValN(NoPGO, "nopgo", "Do not use PGO."), | ||
clEnumValN(InstrGen, "pgo-instr-gen-pipeline", | ||
"Instrument the IR to generate profile."), | ||
clEnumValN(InstrUse, "pgo-instr-use-pipeline", | ||
"Use instrumented profile to guide PGO."), | ||
clEnumValN(SampleUse, "pgo-sample-use-pipeline", | ||
"Use sampled profile to guide PGO."))); | ||
|
||
static cl::opt<std::string> | ||
ProfileFile("profile-file", cl::desc("Path to the profile."), cl::Hidden); | ||
|
||
static cl::opt<std::string> | ||
MemoryProfileFile("memory-profile-file", | ||
cl::desc("Path to the memory profile."), cl::Hidden); | ||
|
||
static cl::opt<CSPGOKind> CSPGOKindFlag( | ||
"cspgo-kind", cl::init(NoCSPGO), cl::Hidden, | ||
cl::desc("The kind of context sensitive profile guided optimization"), | ||
cl::values( | ||
clEnumValN(NoCSPGO, "nocspgo", "Do not use CSPGO."), | ||
clEnumValN( | ||
CSInstrGen, "cspgo-instr-gen-pipeline", | ||
"Instrument (context sensitive) the IR to generate profile."), | ||
clEnumValN( | ||
CSInstrUse, "cspgo-instr-use-pipeline", | ||
"Use instrumented (context sensitive) profile to guide PGO."))); | ||
|
||
static cl::opt<std::string> CSProfileGenFile( | ||
"cs-profilegen-file", | ||
cl::desc("Path to the instrumented context sensitive profile."), | ||
cl::Hidden); | ||
|
||
static cl::opt<std::string> | ||
ProfileRemappingFile("profile-remapping-file", | ||
cl::desc("Path to the profile remapping file."), | ||
cl::Hidden); | ||
|
||
static cl::opt<PGOOptions::ColdFuncOpt> PGOColdFuncAttr( | ||
"pgo-cold-func-opt", cl::init(PGOOptions::ColdFuncOpt::Default), cl::Hidden, | ||
cl::desc( | ||
"Function attribute to apply to cold functions as determined by PGO"), | ||
cl::values(clEnumValN(PGOOptions::ColdFuncOpt::Default, "default", | ||
"Default (no attribute)"), | ||
clEnumValN(PGOOptions::ColdFuncOpt::OptSize, "optsize", | ||
"Mark cold functions with optsize."), | ||
clEnumValN(PGOOptions::ColdFuncOpt::MinSize, "minsize", | ||
"Mark cold functions with minsize."), | ||
clEnumValN(PGOOptions::ColdFuncOpt::OptNone, "optnone", | ||
"Mark cold functions with optnone."))); | ||
|
||
static cl::opt<bool> DebugInfoForProfiling( | ||
"debug-info-for-profiling", cl::init(false), cl::Hidden, | ||
cl::desc("Emit special debug info to enable PGO profile generation.")); | ||
|
||
static cl::opt<bool> PseudoProbeForProfiling( | ||
"pseudo-probe-for-profiling", cl::init(false), cl::Hidden, | ||
cl::desc("Emit pseudo probes to enable PGO profile generation.")); | ||
|
||
// Function to set PGO options on TargetMachine based on command line flags | ||
static void setPGOOptions(TargetMachine &TM) { | ||
std::optional<PGOOptions> PGOOpt; | ||
|
||
switch (PGOKindFlag) { | ||
case InstrGen: | ||
PGOOpt = | ||
PGOOptions(ProfileFile, "", "", MemoryProfileFile, PGOOptions::IRInstr, | ||
PGOOptions::NoCSAction, PGOColdFuncAttr); | ||
break; | ||
case InstrUse: | ||
PGOOpt = | ||
PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, | ||
PGOOptions::IRUse, PGOOptions::NoCSAction, PGOColdFuncAttr); | ||
break; | ||
case SampleUse: | ||
PGOOpt = PGOOptions(ProfileFile, "", ProfileRemappingFile, | ||
MemoryProfileFile, PGOOptions::SampleUse, | ||
PGOOptions::NoCSAction, PGOColdFuncAttr); | ||
break; | ||
case NoPGO: | ||
if (DebugInfoForProfiling || PseudoProbeForProfiling || | ||
!MemoryProfileFile.empty()) | ||
PGOOpt = PGOOptions("", "", "", MemoryProfileFile, PGOOptions::NoAction, | ||
PGOOptions::NoCSAction, PGOColdFuncAttr, | ||
DebugInfoForProfiling, PseudoProbeForProfiling); | ||
else | ||
PGOOpt = std::nullopt; | ||
break; | ||
} | ||
|
||
// Handle context-sensitive PGO options | ||
if (CSPGOKindFlag != NoCSPGO) { | ||
if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr || | ||
PGOOpt->Action == PGOOptions::SampleUse)) { | ||
errs() << "CSPGOKind cannot be used with IRInstr or SampleUse"; | ||
exit(1); | ||
} | ||
if (CSPGOKindFlag == CSInstrGen) { | ||
if (CSProfileGenFile.empty()) { | ||
errs() << "CSInstrGen needs to specify CSProfileGenFile"; | ||
exit(1); | ||
} | ||
if (PGOOpt) { | ||
PGOOpt->CSAction = PGOOptions::CSIRInstr; | ||
PGOOpt->CSProfileGenFile = CSProfileGenFile; | ||
} else { | ||
PGOOpt = PGOOptions("", CSProfileGenFile, ProfileRemappingFile, | ||
/*MemoryProfile=*/"", PGOOptions::NoAction, | ||
PGOOptions::CSIRInstr); | ||
} | ||
} else /* CSPGOKindFlag == CSInstrUse */ { | ||
if (!PGOOpt) { | ||
errs() << "CSInstrUse needs to be together with InstrUse"; | ||
exit(1); | ||
} | ||
PGOOpt->CSAction = PGOOptions::CSIRUse; | ||
} | ||
} | ||
|
||
if (PGOOpt) | ||
TM.setPGOOption(PGOOpt); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the changes here are just to cover the case of directly invoke llc on an IR module with profile information (like the test does), I think we can use a simple flag instead of creating a PGOOption that acts like a boolean switch. In most scenarios, we'd feed cpp source and profile to clang driver, which would set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on a simple flag, would it make sense to still use we can pass any of the PGO option.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought it might be useful in the future. There is no way to inject PGOOption into llc. However, there might be logic relying on those parameters in CodeGen in the future that would be easier to test. Maybe I can keep just pgo-kind cmd parameter for now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with the others. Adding all of this just for a possible future use case seems unreasonable, we can always add more if necessary. Also +1 to using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kept only pgo-kind option with the only value pgo-sample-use-pipeline. |
||
} | ||
|
||
static int compileModule(char **, LLVMContext &); | ||
|
||
[[noreturn]] static void reportError(Twine Msg, StringRef Filename = "") { | ||
|
@@ -554,6 +694,9 @@ static int compileModule(char **argv, LLVMContext &Context) { | |
TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl)); | ||
assert(Target && "Could not allocate target machine!"); | ||
|
||
// Set PGO options based on command line flags | ||
setPGOOptions(*Target); | ||
|
||
return Target->createDataLayout().getStringRepresentation(); | ||
}; | ||
if (InputLanguage == "mir" || | ||
|
@@ -597,6 +740,9 @@ static int compileModule(char **argv, LLVMContext &Context) { | |
TheTriple, CPUStr, FeaturesStr, Options, RM, CM, OLvl)); | ||
assert(Target && "Could not allocate target machine!"); | ||
|
||
// Set PGO options based on command line flags | ||
setPGOOptions(*Target); | ||
|
||
// If we don't have a module then just exit now. We do this down | ||
// here since the CPU/Feature help is underneath the target machine | ||
// creation. | ||
|
Uh oh!
There was an error while loading. Please reload this page.