@@ -80,6 +80,27 @@ static cl::opt<unsigned>
8080 ICPCSSkip (" icp-csskip" , cl::init(0 ), cl::Hidden,
8181 cl::desc(" Skip Callsite up to this number for this compilation" ));
8282
83+ // ICP the candidate function even when only a declaration is present.
84+ static cl::opt<bool > ICPAllowDecls (
85+ " icp-allow-decls" , cl::init(false ), cl::Hidden,
86+ cl::desc(" Promote the target candidate even when the defintion "
87+ " is not available" ));
88+
89+ // ICP hot candidate functions only. When setting to false, non-cold functions
90+ // (warm functions) can also be promoted.
91+ static cl::opt<bool >
92+ ICPAllowHotOnly (" icp-allow-hot-only" , cl::init(true ), cl::Hidden,
93+ cl::desc(" Promote the target candidate only if it is a "
94+ " hot function. Otherwise, warm functions can "
95+ " also be promoted" ));
96+
97+ // If one target cannot be ICP'd, proceed with the remaining targets instead
98+ // of exiting the callsite.
99+ static cl::opt<bool > ICPAllowCandidateSkip (
100+ " icp-allow-candidate-skip" , cl::init(false ), cl::Hidden,
101+ cl::desc(" Continue with the remaining targets instead of exiting "
102+ " when failing in a candidate" ));
103+
83104// Set if the pass is called in LTO optimization. The difference for LTO mode
84105// is the pass won't prefix the source module name to the internal linkage
85106// symbols.
@@ -330,6 +351,7 @@ class IndirectCallPromoter {
330351 struct PromotionCandidate {
331352 Function *const TargetFunction;
332353 const uint64_t Count;
354+ const uint32_t Index;
333355
334356 // The following fields only exists for promotion candidates with vtable
335357 // information.
@@ -341,7 +363,8 @@ class IndirectCallPromoter {
341363 VTableGUIDCountsMap VTableGUIDAndCounts;
342364 SmallVector<Constant *> AddressPoints;
343365
344- PromotionCandidate (Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
366+ PromotionCandidate (Function *F, uint64_t C, uint32_t I)
367+ : TargetFunction(F), Count(C), Index(I) {}
345368 };
346369
347370 // Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ class IndirectCallPromoter {
356379 // Promote a list of targets for one indirect-call callsite by comparing
357380 // indirect callee with functions. Return true if there are IR
358381 // transformations and false otherwise.
359- bool tryToPromoteWithFuncCmp (CallBase &CB, Instruction *VPtr,
360- ArrayRef<PromotionCandidate> Candidates,
361- uint64_t TotalCount,
362- ArrayRef<InstrProfValueData> ICallProfDataRef,
363- uint32_t NumCandidates,
364- VTableGUIDCountsMap &VTableGUIDCounts);
382+ bool tryToPromoteWithFuncCmp (
383+ CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
384+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
385+ uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
365386
366387 // Promote a list of targets for one indirect call by comparing vtables with
367388 // functions. Return true if there are IR transformations and false
@@ -394,12 +415,15 @@ class IndirectCallPromoter {
394415 Constant *getOrCreateVTableAddressPointVar (GlobalVariable *GV,
395416 uint64_t AddressPointOffset);
396417
397- void updateFuncValueProfiles (CallBase &CB, ArrayRef<InstrProfValueData> VDs,
418+ void updateFuncValueProfiles (CallBase &CB,
419+ MutableArrayRef<InstrProfValueData> VDs,
398420 uint64_t Sum, uint32_t MaxMDCount);
399421
400422 void updateVPtrValueProfiles (Instruction *VPtr,
401423 VTableGUIDCountsMap &VTableGUIDCounts);
402424
425+ bool isValidTarget (uint64_t , Function *, const CallBase &, uint64_t );
426+
403427public:
404428 IndirectCallPromoter (
405429 Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -419,6 +443,53 @@ class IndirectCallPromoter {
419443
420444} // end anonymous namespace
421445
446+ bool IndirectCallPromoter::isValidTarget (uint64_t Target,
447+ Function *TargetFunction,
448+ const CallBase &CB, uint64_t Count) {
449+ // Don't promote if the symbol is not defined in the module. This avoids
450+ // creating a reference to a symbol that doesn't exist in the module
451+ // This can happen when we compile with a sample profile collected from
452+ // one binary but used for another, which may have profiled targets that
453+ // aren't used in the new binary. We might have a declaration initially in
454+ // the case where the symbol is globally dead in the binary and removed by
455+ // ThinLTO.
456+ using namespace ore ;
457+ if (TargetFunction == nullptr ) {
458+ LLVM_DEBUG (dbgs () << " Not promote: Cannot find the target\n " );
459+ ORE.emit ([&]() {
460+ return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToFindTarget" , &CB)
461+ << " Cannot promote indirect call: target with md5sum "
462+ << NV (" target md5sum" , Target)
463+ << " not found (count=" << NV (" Count" , Count) << " )" ;
464+ });
465+ return false ;
466+ }
467+ if (!ICPAllowDecls && TargetFunction->isDeclaration ()) {
468+ LLVM_DEBUG (dbgs () << " Not promote: target definition is not available\n " );
469+ ORE.emit ([&]() {
470+ return OptimizationRemarkMissed (DEBUG_TYPE, " NoTargetDef" , &CB)
471+ << " Do not promote indirect call: target with md5sum "
472+ << NV (" target md5sum" , Target)
473+ << " definition not available (count=" << ore::NV (" Count" , Count)
474+ << " )" ;
475+ });
476+ return false ;
477+ }
478+
479+ const char *Reason = nullptr ;
480+ if (!isLegalToPromote (CB, TargetFunction, &Reason)) {
481+
482+ ORE.emit ([&]() {
483+ return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToPromote" , &CB)
484+ << " Cannot promote indirect call to "
485+ << NV (" TargetFunction" , TargetFunction)
486+ << " (count=" << NV (" Count" , Count) << " ): " << Reason;
487+ });
488+ return false ;
489+ }
490+ return true ;
491+ }
492+
422493// Indirect-call promotion heuristic. The direct targets are sorted based on
423494// the count. Stop at the first target that is not promoted.
424495std::vector<IndirectCallPromoter::PromotionCandidate>
@@ -469,38 +540,15 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
469540 break ;
470541 }
471542
472- // Don't promote if the symbol is not defined in the module. This avoids
473- // creating a reference to a symbol that doesn't exist in the module
474- // This can happen when we compile with a sample profile collected from
475- // one binary but used for another, which may have profiled targets that
476- // aren't used in the new binary. We might have a declaration initially in
477- // the case where the symbol is globally dead in the binary and removed by
478- // ThinLTO.
479543 Function *TargetFunction = Symtab->getFunction (Target);
480- if (TargetFunction == nullptr || TargetFunction->isDeclaration ()) {
481- LLVM_DEBUG (dbgs () << " Not promote: Cannot find the target\n " );
482- ORE.emit ([&]() {
483- return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToFindTarget" , &CB)
484- << " Cannot promote indirect call: target with md5sum "
485- << ore::NV (" target md5sum" , Target) << " not found" ;
486- });
487- break ;
488- }
489-
490- const char *Reason = nullptr ;
491- if (!isLegalToPromote (CB, TargetFunction, &Reason)) {
492- using namespace ore ;
493-
494- ORE.emit ([&]() {
495- return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToPromote" , &CB)
496- << " Cannot promote indirect call to "
497- << NV (" TargetFunction" , TargetFunction) << " with count of "
498- << NV (" Count" , Count) << " : " << Reason;
499- });
500- break ;
544+ if (!isValidTarget (Target, TargetFunction, CB, Count)) {
545+ if (ICPAllowCandidateSkip)
546+ continue ;
547+ else
548+ break ;
501549 }
502550
503- Ret.push_back (PromotionCandidate (TargetFunction, Count));
551+ Ret.push_back (PromotionCandidate (TargetFunction, Count, I ));
504552 TotalCount -= Count;
505553 }
506554 return Ret;
@@ -642,7 +690,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
642690// Promote indirect-call to conditional direct-call for one callsite.
643691bool IndirectCallPromoter::tryToPromoteWithFuncCmp (
644692 CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
645- uint64_t TotalCount, ArrayRef <InstrProfValueData> ICallProfDataRef,
693+ uint64_t TotalCount, MutableArrayRef <InstrProfValueData> ICallProfDataRef,
646694 uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
647695 uint32_t NumPromoted = 0 ;
648696
@@ -655,6 +703,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
655703 NumOfPGOICallPromotion++;
656704 NumPromoted++;
657705
706+ // Update the count and this entry will be erased later.
707+ ICallProfDataRef[C.Index ].Count = 0 ;
658708 if (!EnableVTableProfileUse || C.VTableGUIDAndCounts .empty ())
659709 continue ;
660710
@@ -679,21 +729,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
679729 " Number of promoted functions should not be greater than the number "
680730 " of values in profile metadata" );
681731
682- // Update value profiles on the indirect call.
683- updateFuncValueProfiles (CB, ICallProfDataRef.slice (NumPromoted), TotalCount,
684- NumCandidates);
732+ updateFuncValueProfiles (CB, ICallProfDataRef, TotalCount, NumCandidates);
685733 updateVPtrValueProfiles (VPtr, VTableGUIDCounts);
686734 return true ;
687735}
688736
689737void IndirectCallPromoter::updateFuncValueProfiles (
690- CallBase &CB, ArrayRef <InstrProfValueData> CallVDs, uint64_t TotalCount ,
691- uint32_t MaxMDCount) {
738+ CallBase &CB, MutableArrayRef <InstrProfValueData> CallVDs,
739+ uint64_t TotalCount, uint32_t MaxMDCount) {
692740 // First clear the existing !prof.
693741 CB.setMetadata (LLVMContext::MD_prof, nullptr );
742+
743+ // Sort value profiles by count in descending order.
744+ llvm::stable_sort (CallVDs, [](const InstrProfValueData &LHS,
745+ const InstrProfValueData &RHS) {
746+ return LHS.Count > RHS.Count ;
747+ });
748+ // Drop the <target-value, count> pair if count is zero.
749+ ArrayRef<InstrProfValueData> VDs (
750+ CallVDs.begin (),
751+ llvm::upper_bound (CallVDs, 0U ,
752+ [](uint64_t Count, const InstrProfValueData &ProfData) {
753+ return ProfData.Count <= Count;
754+ }));
755+
694756 // Annotate the remaining value profiles if counter is not zero.
695757 if (TotalCount != 0 )
696- annotateValueSite (M, CB, CallVDs , TotalCount, IPVK_IndirectCallTarget,
758+ annotateValueSite (M, CB, VDs , TotalCount, IPVK_IndirectCallTarget,
697759 MaxMDCount);
698760}
699761
@@ -726,7 +788,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
726788 uint64_t TotalFuncCount, uint32_t NumCandidates,
727789 MutableArrayRef<InstrProfValueData> ICallProfDataRef,
728790 VTableGUIDCountsMap &VTableGUIDCounts) {
729- SmallVector<uint64_t , 4 > PromotedFuncCount;
791+ SmallVector<std::pair< uint32_t , uint64_t > , 4 > PromotedFuncCount;
730792
731793 for (const auto &Candidate : Candidates) {
732794 for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts )
@@ -771,7 +833,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
771833 return Remark;
772834 });
773835
774- PromotedFuncCount.push_back (Candidate.Count );
836+ PromotedFuncCount.push_back ({ Candidate.Index , Candidate. Count } );
775837
776838 assert (TotalFuncCount >= Candidate.Count &&
777839 " Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +854,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
792854 // used to load multiple virtual functions. The vtable profiles needs to be
793855 // updated properly in that case (e.g, for each indirect call annotate both
794856 // type profiles and function profiles in one !prof).
795- for (size_t I = 0 ; I < PromotedFuncCount.size (); I++)
796- ICallProfDataRef[I].Count -=
797- std::max (PromotedFuncCount[I], ICallProfDataRef[I].Count );
798- // Sort value profiles by count in descending order.
799- llvm::stable_sort (ICallProfDataRef, [](const InstrProfValueData &LHS,
800- const InstrProfValueData &RHS) {
801- return LHS.Count > RHS.Count ;
802- });
803- // Drop the <target-value, count> pair if count is zero.
804- ArrayRef<InstrProfValueData> VDs (
805- ICallProfDataRef.begin (),
806- llvm::upper_bound (ICallProfDataRef, 0U ,
807- [](uint64_t Count, const InstrProfValueData &ProfData) {
808- return ProfData.Count <= Count;
809- }));
810- updateFuncValueProfiles (CB, VDs, TotalFuncCount, NumCandidates);
857+ for (size_t I = 0 ; I < PromotedFuncCount.size (); I++) {
858+ uint32_t Index = PromotedFuncCount[I].first ;
859+ ICallProfDataRef[Index].Count -=
860+ std::max (PromotedFuncCount[I].second , ICallProfDataRef[Index].Count );
861+ }
862+ updateFuncValueProfiles (CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
811863 updateVPtrValueProfiles (VPtr, VTableGUIDCounts);
812864 return true ;
813865}
@@ -822,9 +874,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822874 uint64_t TotalCount;
823875 auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction (
824876 CB, TotalCount, NumCandidates);
825- if (!NumCandidates ||
826- (PSI && PSI->hasProfileSummary () && !PSI->isHotCount (TotalCount)))
877+ if (!NumCandidates)
827878 continue ;
879+ if (PSI && PSI->hasProfileSummary ()) {
880+ // Don't promote cold candidates.
881+ if (PSI->isColdCount (TotalCount)) {
882+ LLVM_DEBUG (dbgs () << " Don't promote the cold candidate: TotalCount="
883+ << TotalCount << " \n " );
884+ continue ;
885+ }
886+ // Only pormote hot if ICPAllowHotOnly is true.
887+ if (ICPAllowHotOnly && !PSI->isHotCount (TotalCount)) {
888+ LLVM_DEBUG (dbgs () << " Don't promote the non-hot candidate: TotalCount="
889+ << TotalCount << " \n " );
890+ continue ;
891+ }
892+ }
828893
829894 auto PromotionCandidates = getPromotionCandidatesForCallSite (
830895 *CB, ICallProfDataRef, TotalCount, NumCandidates);
0 commit comments