Skip to content

Commit c9a8e15

Browse files
authored
[ICP] Add a few tunings to indirect-call-promotion (#149892)
[ICP] Add a few tunings to indirect-call-promtion Indirect-call promotion (ICP) has been adjusted with the following tunings: (1) Candidate functions can be now ICP'd even if only a declaration is present. (2) All non-cold candidate functions are now considered by ICP. Previously, only hot targets were considered. (3) If one target cannot be ICP'd, proceed with the remaining targets instead of exiting the callsite. This update hides all tunings under internal options and disables them by default. They'll be enabled in a later update. There'll also be another update to address the "not found" issue with indirect targets.
1 parent 36a19c5 commit c9a8e15

File tree

5 files changed

+299
-69
lines changed

5 files changed

+299
-69
lines changed

llvm/lib/Analysis/ProfileSummaryInfo.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,18 @@ void ProfileSummaryInfo::computeThresholds() {
121121
ProfileSummaryBuilder::getHotCountThreshold(DetailedSummary);
122122
ColdCountThreshold =
123123
ProfileSummaryBuilder::getColdCountThreshold(DetailedSummary);
124-
assert(ColdCountThreshold <= HotCountThreshold &&
125-
"Cold count threshold cannot exceed hot count threshold!");
124+
// When the hot and cold thresholds are identical, we would classify
125+
// a count value as both hot and cold since we are doing an inclusive check
126+
// (see ::is{Hot|Cold}Count(). To avoid this undesirable overlap, ensure the
127+
// thresholds are distinct.
128+
if (HotCountThreshold == ColdCountThreshold) {
129+
if (ColdCountThreshold > 0)
130+
(*ColdCountThreshold)--;
131+
else
132+
(*HotCountThreshold)++;
133+
}
134+
assert(ColdCountThreshold < HotCountThreshold &&
135+
"Cold count threshold should be less than hot count threshold!");
126136
if (!hasPartialSampleProfile() || !ScalePartialSampleProfileWorkingSetSize) {
127137
HasHugeWorkingSetSize =
128138
HotEntry.NumCounts > ProfileSummaryHugeWorkingSetSizeThreshold;

llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp

Lines changed: 129 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,27 @@ static cl::opt<unsigned>
8080
ICPCSSkip("icp-csskip", cl::init(0), cl::Hidden,
8181
cl::desc("Skip Callsite up to this number for this compilation"));
8282

83+
// ICP the candidate function even when only a declaration is present.
84+
static cl::opt<bool> ICPAllowDecls(
85+
"icp-allow-decls", cl::init(false), cl::Hidden,
86+
cl::desc("Promote the target candidate even when the defintion "
87+
" is not available"));
88+
89+
// ICP hot candidate functions only. When setting to false, non-cold functions
90+
// (warm functions) can also be promoted.
91+
static cl::opt<bool>
92+
ICPAllowHotOnly("icp-allow-hot-only", cl::init(true), cl::Hidden,
93+
cl::desc("Promote the target candidate only if it is a "
94+
"hot function. Otherwise, warm functions can "
95+
"also be promoted"));
96+
97+
// If one target cannot be ICP'd, proceed with the remaining targets instead
98+
// of exiting the callsite.
99+
static cl::opt<bool> ICPAllowCandidateSkip(
100+
"icp-allow-candidate-skip", cl::init(false), cl::Hidden,
101+
cl::desc("Continue with the remaining targets instead of exiting "
102+
"when failing in a candidate"));
103+
83104
// Set if the pass is called in LTO optimization. The difference for LTO mode
84105
// is the pass won't prefix the source module name to the internal linkage
85106
// symbols.
@@ -330,6 +351,7 @@ class IndirectCallPromoter {
330351
struct PromotionCandidate {
331352
Function *const TargetFunction;
332353
const uint64_t Count;
354+
const uint32_t Index;
333355

334356
// The following fields only exists for promotion candidates with vtable
335357
// information.
@@ -341,7 +363,8 @@ class IndirectCallPromoter {
341363
VTableGUIDCountsMap VTableGUIDAndCounts;
342364
SmallVector<Constant *> AddressPoints;
343365

344-
PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
366+
PromotionCandidate(Function *F, uint64_t C, uint32_t I)
367+
: TargetFunction(F), Count(C), Index(I) {}
345368
};
346369

347370
// Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ class IndirectCallPromoter {
356379
// Promote a list of targets for one indirect-call callsite by comparing
357380
// indirect callee with functions. Return true if there are IR
358381
// transformations and false otherwise.
359-
bool tryToPromoteWithFuncCmp(CallBase &CB, Instruction *VPtr,
360-
ArrayRef<PromotionCandidate> Candidates,
361-
uint64_t TotalCount,
362-
ArrayRef<InstrProfValueData> ICallProfDataRef,
363-
uint32_t NumCandidates,
364-
VTableGUIDCountsMap &VTableGUIDCounts);
382+
bool tryToPromoteWithFuncCmp(
383+
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
384+
uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
385+
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
365386

366387
// Promote a list of targets for one indirect call by comparing vtables with
367388
// functions. Return true if there are IR transformations and false
@@ -394,12 +415,15 @@ class IndirectCallPromoter {
394415
Constant *getOrCreateVTableAddressPointVar(GlobalVariable *GV,
395416
uint64_t AddressPointOffset);
396417

397-
void updateFuncValueProfiles(CallBase &CB, ArrayRef<InstrProfValueData> VDs,
418+
void updateFuncValueProfiles(CallBase &CB,
419+
MutableArrayRef<InstrProfValueData> VDs,
398420
uint64_t Sum, uint32_t MaxMDCount);
399421

400422
void updateVPtrValueProfiles(Instruction *VPtr,
401423
VTableGUIDCountsMap &VTableGUIDCounts);
402424

425+
bool isValidTarget(uint64_t, Function *, const CallBase &, uint64_t);
426+
403427
public:
404428
IndirectCallPromoter(
405429
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -419,6 +443,53 @@ class IndirectCallPromoter {
419443

420444
} // end anonymous namespace
421445

446+
bool IndirectCallPromoter::isValidTarget(uint64_t Target,
447+
Function *TargetFunction,
448+
const CallBase &CB, uint64_t Count) {
449+
// Don't promote if the symbol is not defined in the module. This avoids
450+
// creating a reference to a symbol that doesn't exist in the module
451+
// This can happen when we compile with a sample profile collected from
452+
// one binary but used for another, which may have profiled targets that
453+
// aren't used in the new binary. We might have a declaration initially in
454+
// the case where the symbol is globally dead in the binary and removed by
455+
// ThinLTO.
456+
using namespace ore;
457+
if (TargetFunction == nullptr) {
458+
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
459+
ORE.emit([&]() {
460+
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
461+
<< "Cannot promote indirect call: target with md5sum "
462+
<< NV("target md5sum", Target)
463+
<< " not found (count=" << NV("Count", Count) << ")";
464+
});
465+
return false;
466+
}
467+
if (!ICPAllowDecls && TargetFunction->isDeclaration()) {
468+
LLVM_DEBUG(dbgs() << " Not promote: target definition is not available\n");
469+
ORE.emit([&]() {
470+
return OptimizationRemarkMissed(DEBUG_TYPE, "NoTargetDef", &CB)
471+
<< "Do not promote indirect call: target with md5sum "
472+
<< NV("target md5sum", Target)
473+
<< " definition not available (count=" << ore::NV("Count", Count)
474+
<< ")";
475+
});
476+
return false;
477+
}
478+
479+
const char *Reason = nullptr;
480+
if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
481+
482+
ORE.emit([&]() {
483+
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
484+
<< "Cannot promote indirect call to "
485+
<< NV("TargetFunction", TargetFunction)
486+
<< " (count=" << NV("Count", Count) << "): " << Reason;
487+
});
488+
return false;
489+
}
490+
return true;
491+
}
492+
422493
// Indirect-call promotion heuristic. The direct targets are sorted based on
423494
// the count. Stop at the first target that is not promoted.
424495
std::vector<IndirectCallPromoter::PromotionCandidate>
@@ -469,38 +540,15 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
469540
break;
470541
}
471542

472-
// Don't promote if the symbol is not defined in the module. This avoids
473-
// creating a reference to a symbol that doesn't exist in the module
474-
// This can happen when we compile with a sample profile collected from
475-
// one binary but used for another, which may have profiled targets that
476-
// aren't used in the new binary. We might have a declaration initially in
477-
// the case where the symbol is globally dead in the binary and removed by
478-
// ThinLTO.
479543
Function *TargetFunction = Symtab->getFunction(Target);
480-
if (TargetFunction == nullptr || TargetFunction->isDeclaration()) {
481-
LLVM_DEBUG(dbgs() << " Not promote: Cannot find the target\n");
482-
ORE.emit([&]() {
483-
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToFindTarget", &CB)
484-
<< "Cannot promote indirect call: target with md5sum "
485-
<< ore::NV("target md5sum", Target) << " not found";
486-
});
487-
break;
488-
}
489-
490-
const char *Reason = nullptr;
491-
if (!isLegalToPromote(CB, TargetFunction, &Reason)) {
492-
using namespace ore;
493-
494-
ORE.emit([&]() {
495-
return OptimizationRemarkMissed(DEBUG_TYPE, "UnableToPromote", &CB)
496-
<< "Cannot promote indirect call to "
497-
<< NV("TargetFunction", TargetFunction) << " with count of "
498-
<< NV("Count", Count) << ": " << Reason;
499-
});
500-
break;
544+
if (!isValidTarget(Target, TargetFunction, CB, Count)) {
545+
if (ICPAllowCandidateSkip)
546+
continue;
547+
else
548+
break;
501549
}
502550

503-
Ret.push_back(PromotionCandidate(TargetFunction, Count));
551+
Ret.push_back(PromotionCandidate(TargetFunction, Count, I));
504552
TotalCount -= Count;
505553
}
506554
return Ret;
@@ -642,7 +690,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
642690
// Promote indirect-call to conditional direct-call for one callsite.
643691
bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
644692
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
645-
uint64_t TotalCount, ArrayRef<InstrProfValueData> ICallProfDataRef,
693+
uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
646694
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
647695
uint32_t NumPromoted = 0;
648696

@@ -655,6 +703,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
655703
NumOfPGOICallPromotion++;
656704
NumPromoted++;
657705

706+
// Update the count and this entry will be erased later.
707+
ICallProfDataRef[C.Index].Count = 0;
658708
if (!EnableVTableProfileUse || C.VTableGUIDAndCounts.empty())
659709
continue;
660710

@@ -679,21 +729,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
679729
"Number of promoted functions should not be greater than the number "
680730
"of values in profile metadata");
681731

682-
// Update value profiles on the indirect call.
683-
updateFuncValueProfiles(CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
684-
NumCandidates);
732+
updateFuncValueProfiles(CB, ICallProfDataRef, TotalCount, NumCandidates);
685733
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
686734
return true;
687735
}
688736

689737
void IndirectCallPromoter::updateFuncValueProfiles(
690-
CallBase &CB, ArrayRef<InstrProfValueData> CallVDs, uint64_t TotalCount,
691-
uint32_t MaxMDCount) {
738+
CallBase &CB, MutableArrayRef<InstrProfValueData> CallVDs,
739+
uint64_t TotalCount, uint32_t MaxMDCount) {
692740
// First clear the existing !prof.
693741
CB.setMetadata(LLVMContext::MD_prof, nullptr);
742+
743+
// Sort value profiles by count in descending order.
744+
llvm::stable_sort(CallVDs, [](const InstrProfValueData &LHS,
745+
const InstrProfValueData &RHS) {
746+
return LHS.Count > RHS.Count;
747+
});
748+
// Drop the <target-value, count> pair if count is zero.
749+
ArrayRef<InstrProfValueData> VDs(
750+
CallVDs.begin(),
751+
llvm::upper_bound(CallVDs, 0U,
752+
[](uint64_t Count, const InstrProfValueData &ProfData) {
753+
return ProfData.Count <= Count;
754+
}));
755+
694756
// Annotate the remaining value profiles if counter is not zero.
695757
if (TotalCount != 0)
696-
annotateValueSite(M, CB, CallVDs, TotalCount, IPVK_IndirectCallTarget,
758+
annotateValueSite(M, CB, VDs, TotalCount, IPVK_IndirectCallTarget,
697759
MaxMDCount);
698760
}
699761

@@ -726,7 +788,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
726788
uint64_t TotalFuncCount, uint32_t NumCandidates,
727789
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
728790
VTableGUIDCountsMap &VTableGUIDCounts) {
729-
SmallVector<uint64_t, 4> PromotedFuncCount;
791+
SmallVector<std::pair<uint32_t, uint64_t>, 4> PromotedFuncCount;
730792

731793
for (const auto &Candidate : Candidates) {
732794
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts)
@@ -771,7 +833,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
771833
return Remark;
772834
});
773835

774-
PromotedFuncCount.push_back(Candidate.Count);
836+
PromotedFuncCount.push_back({Candidate.Index, Candidate.Count});
775837

776838
assert(TotalFuncCount >= Candidate.Count &&
777839
"Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +854,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
792854
// used to load multiple virtual functions. The vtable profiles needs to be
793855
// updated properly in that case (e.g, for each indirect call annotate both
794856
// type profiles and function profiles in one !prof).
795-
for (size_t I = 0; I < PromotedFuncCount.size(); I++)
796-
ICallProfDataRef[I].Count -=
797-
std::max(PromotedFuncCount[I], ICallProfDataRef[I].Count);
798-
// Sort value profiles by count in descending order.
799-
llvm::stable_sort(ICallProfDataRef, [](const InstrProfValueData &LHS,
800-
const InstrProfValueData &RHS) {
801-
return LHS.Count > RHS.Count;
802-
});
803-
// Drop the <target-value, count> pair if count is zero.
804-
ArrayRef<InstrProfValueData> VDs(
805-
ICallProfDataRef.begin(),
806-
llvm::upper_bound(ICallProfDataRef, 0U,
807-
[](uint64_t Count, const InstrProfValueData &ProfData) {
808-
return ProfData.Count <= Count;
809-
}));
810-
updateFuncValueProfiles(CB, VDs, TotalFuncCount, NumCandidates);
857+
for (size_t I = 0; I < PromotedFuncCount.size(); I++) {
858+
uint32_t Index = PromotedFuncCount[I].first;
859+
ICallProfDataRef[Index].Count -=
860+
std::max(PromotedFuncCount[I].second, ICallProfDataRef[Index].Count);
861+
}
862+
updateFuncValueProfiles(CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
811863
updateVPtrValueProfiles(VPtr, VTableGUIDCounts);
812864
return true;
813865
}
@@ -822,9 +874,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822874
uint64_t TotalCount;
823875
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction(
824876
CB, TotalCount, NumCandidates);
825-
if (!NumCandidates ||
826-
(PSI && PSI->hasProfileSummary() && !PSI->isHotCount(TotalCount)))
877+
if (!NumCandidates)
827878
continue;
879+
if (PSI && PSI->hasProfileSummary()) {
880+
// Don't promote cold candidates.
881+
if (PSI->isColdCount(TotalCount)) {
882+
LLVM_DEBUG(dbgs() << "Don't promote the cold candidate: TotalCount="
883+
<< TotalCount << "\n");
884+
continue;
885+
}
886+
// Only pormote hot if ICPAllowHotOnly is true.
887+
if (ICPAllowHotOnly && !PSI->isHotCount(TotalCount)) {
888+
LLVM_DEBUG(dbgs() << "Don't promote the non-hot candidate: TotalCount="
889+
<< TotalCount << "\n");
890+
continue;
891+
}
892+
}
828893

829894
auto PromotionCandidates = getPromotionCandidatesForCallSite(
830895
*CB, ICallProfDataRef, TotalCount, NumCandidates);

llvm/test/ThinLTO/X86/memprof-icp.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@
229229
; RUN: llvm-lto2 run %t/main.o %t/foo.o -enable-memprof-context-disambiguation \
230230
; RUN: -import-instr-limit=0 \
231231
; RUN: -memprof-require-definition-for-promotion \
232+
; RUN: -icp-allow-decls=false \
232233
; RUN: -enable-memprof-indirect-call-support=true \
233234
; RUN: -supports-hot-cold-new \
234235
; RUN: -r=%t/foo.o,_Z3fooR2B0j,plx \

llvm/test/Transforms/PGOProfile/icp_mismatch_msg.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks-missed=pgo-icall-prom -S 2>& 1 | FileCheck %s
22

3-
; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func4 with count of 1234: The number of arguments mismatch
4-
; CHECK: remark: <unknown>:0:0: Cannot promote indirect call: target with md5sum{{.*}} not found
5-
; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func2 with count of 7890: Return type mismatch
3+
; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func4 (count=1234): The number of arguments mismatch
4+
; CHECK: remark: <unknown>:0:0: Cannot promote indirect call: target with md5sum {{.*}} not found (count=2345)
5+
; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func2 (count=7890): Return type mismatch
66

77
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
88
target triple = "x86_64-unknown-linux-gnu"

0 commit comments

Comments
 (0)