@@ -80,6 +80,27 @@ static cl::opt<unsigned>
80
80
ICPCSSkip (" icp-csskip" , cl::init(0 ), cl::Hidden,
81
81
cl::desc(" Skip Callsite up to this number for this compilation" ));
82
82
83
+ // ICP the candidate function even when only a declaration is present.
84
+ static cl::opt<bool > ICPAllowDecls (
85
+ " icp-allow-decls" , cl::init(false ), cl::Hidden,
86
+ cl::desc(" Promote the target candidate even when the defintion "
87
+ " is not available" ));
88
+
89
+ // ICP hot candidate functions only. When setting to false, non-cold functions
90
+ // (warm functions) can also be promoted.
91
+ static cl::opt<bool >
92
+ ICPAllowHotOnly (" icp-allow-hot-only" , cl::init(true ), cl::Hidden,
93
+ cl::desc(" Promote the target candidate only if it is a "
94
+ " hot function. Otherwise, warm functions can "
95
+ " also be promoted" ));
96
+
97
+ // If one target cannot be ICP'd, proceed with the remaining targets instead
98
+ // of exiting the callsite.
99
+ static cl::opt<bool > ICPAllowCandidateSkip (
100
+ " icp-allow-candidate-skip" , cl::init(false ), cl::Hidden,
101
+ cl::desc(" Continue with the remaining targets instead of exiting "
102
+ " when failing in a candidate" ));
103
+
83
104
// Set if the pass is called in LTO optimization. The difference for LTO mode
84
105
// is the pass won't prefix the source module name to the internal linkage
85
106
// symbols.
@@ -330,6 +351,7 @@ class IndirectCallPromoter {
330
351
struct PromotionCandidate {
331
352
Function *const TargetFunction;
332
353
const uint64_t Count;
354
+ const uint32_t Index;
333
355
334
356
// The following fields only exists for promotion candidates with vtable
335
357
// information.
@@ -341,7 +363,8 @@ class IndirectCallPromoter {
341
363
VTableGUIDCountsMap VTableGUIDAndCounts;
342
364
SmallVector<Constant *> AddressPoints;
343
365
344
- PromotionCandidate (Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
366
+ PromotionCandidate (Function *F, uint64_t C, uint32_t I)
367
+ : TargetFunction(F), Count(C), Index(I) {}
345
368
};
346
369
347
370
// Check if the indirect-call call site should be promoted. Return the number
@@ -356,12 +379,10 @@ class IndirectCallPromoter {
356
379
// Promote a list of targets for one indirect-call callsite by comparing
357
380
// indirect callee with functions. Return true if there are IR
358
381
// transformations and false otherwise.
359
- bool tryToPromoteWithFuncCmp (CallBase &CB, Instruction *VPtr,
360
- ArrayRef<PromotionCandidate> Candidates,
361
- uint64_t TotalCount,
362
- ArrayRef<InstrProfValueData> ICallProfDataRef,
363
- uint32_t NumCandidates,
364
- VTableGUIDCountsMap &VTableGUIDCounts);
382
+ bool tryToPromoteWithFuncCmp (
383
+ CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
384
+ uint64_t TotalCount, MutableArrayRef<InstrProfValueData> ICallProfDataRef,
385
+ uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts);
365
386
366
387
// Promote a list of targets for one indirect call by comparing vtables with
367
388
// functions. Return true if there are IR transformations and false
@@ -394,12 +415,15 @@ class IndirectCallPromoter {
394
415
Constant *getOrCreateVTableAddressPointVar (GlobalVariable *GV,
395
416
uint64_t AddressPointOffset);
396
417
397
- void updateFuncValueProfiles (CallBase &CB, ArrayRef<InstrProfValueData> VDs,
418
+ void updateFuncValueProfiles (CallBase &CB,
419
+ MutableArrayRef<InstrProfValueData> VDs,
398
420
uint64_t Sum, uint32_t MaxMDCount);
399
421
400
422
void updateVPtrValueProfiles (Instruction *VPtr,
401
423
VTableGUIDCountsMap &VTableGUIDCounts);
402
424
425
+ bool isValidTarget (uint64_t , Function *, const CallBase &, uint64_t );
426
+
403
427
public:
404
428
IndirectCallPromoter (
405
429
Function &Func, Module &M, InstrProfSymtab *Symtab, bool SamplePGO,
@@ -419,6 +443,53 @@ class IndirectCallPromoter {
419
443
420
444
} // end anonymous namespace
421
445
446
+ bool IndirectCallPromoter::isValidTarget (uint64_t Target,
447
+ Function *TargetFunction,
448
+ const CallBase &CB, uint64_t Count) {
449
+ // Don't promote if the symbol is not defined in the module. This avoids
450
+ // creating a reference to a symbol that doesn't exist in the module
451
+ // This can happen when we compile with a sample profile collected from
452
+ // one binary but used for another, which may have profiled targets that
453
+ // aren't used in the new binary. We might have a declaration initially in
454
+ // the case where the symbol is globally dead in the binary and removed by
455
+ // ThinLTO.
456
+ using namespace ore ;
457
+ if (TargetFunction == nullptr ) {
458
+ LLVM_DEBUG (dbgs () << " Not promote: Cannot find the target\n " );
459
+ ORE.emit ([&]() {
460
+ return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToFindTarget" , &CB)
461
+ << " Cannot promote indirect call: target with md5sum "
462
+ << NV (" target md5sum" , Target)
463
+ << " not found (count=" << NV (" Count" , Count) << " )" ;
464
+ });
465
+ return false ;
466
+ }
467
+ if (!ICPAllowDecls && TargetFunction->isDeclaration ()) {
468
+ LLVM_DEBUG (dbgs () << " Not promote: target definition is not available\n " );
469
+ ORE.emit ([&]() {
470
+ return OptimizationRemarkMissed (DEBUG_TYPE, " NoTargetDef" , &CB)
471
+ << " Do not promote indirect call: target with md5sum "
472
+ << NV (" target md5sum" , Target)
473
+ << " definition not available (count=" << ore::NV (" Count" , Count)
474
+ << " )" ;
475
+ });
476
+ return false ;
477
+ }
478
+
479
+ const char *Reason = nullptr ;
480
+ if (!isLegalToPromote (CB, TargetFunction, &Reason)) {
481
+
482
+ ORE.emit ([&]() {
483
+ return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToPromote" , &CB)
484
+ << " Cannot promote indirect call to "
485
+ << NV (" TargetFunction" , TargetFunction)
486
+ << " (count=" << NV (" Count" , Count) << " ): " << Reason;
487
+ });
488
+ return false ;
489
+ }
490
+ return true ;
491
+ }
492
+
422
493
// Indirect-call promotion heuristic. The direct targets are sorted based on
423
494
// the count. Stop at the first target that is not promoted.
424
495
std::vector<IndirectCallPromoter::PromotionCandidate>
@@ -469,38 +540,15 @@ IndirectCallPromoter::getPromotionCandidatesForCallSite(
469
540
break ;
470
541
}
471
542
472
- // Don't promote if the symbol is not defined in the module. This avoids
473
- // creating a reference to a symbol that doesn't exist in the module
474
- // This can happen when we compile with a sample profile collected from
475
- // one binary but used for another, which may have profiled targets that
476
- // aren't used in the new binary. We might have a declaration initially in
477
- // the case where the symbol is globally dead in the binary and removed by
478
- // ThinLTO.
479
543
Function *TargetFunction = Symtab->getFunction (Target);
480
- if (TargetFunction == nullptr || TargetFunction->isDeclaration ()) {
481
- LLVM_DEBUG (dbgs () << " Not promote: Cannot find the target\n " );
482
- ORE.emit ([&]() {
483
- return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToFindTarget" , &CB)
484
- << " Cannot promote indirect call: target with md5sum "
485
- << ore::NV (" target md5sum" , Target) << " not found" ;
486
- });
487
- break ;
488
- }
489
-
490
- const char *Reason = nullptr ;
491
- if (!isLegalToPromote (CB, TargetFunction, &Reason)) {
492
- using namespace ore ;
493
-
494
- ORE.emit ([&]() {
495
- return OptimizationRemarkMissed (DEBUG_TYPE, " UnableToPromote" , &CB)
496
- << " Cannot promote indirect call to "
497
- << NV (" TargetFunction" , TargetFunction) << " with count of "
498
- << NV (" Count" , Count) << " : " << Reason;
499
- });
500
- break ;
544
+ if (!isValidTarget (Target, TargetFunction, CB, Count)) {
545
+ if (ICPAllowCandidateSkip)
546
+ continue ;
547
+ else
548
+ break ;
501
549
}
502
550
503
- Ret.push_back (PromotionCandidate (TargetFunction, Count));
551
+ Ret.push_back (PromotionCandidate (TargetFunction, Count, I ));
504
552
TotalCount -= Count;
505
553
}
506
554
return Ret;
@@ -642,7 +690,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
642
690
// Promote indirect-call to conditional direct-call for one callsite.
643
691
bool IndirectCallPromoter::tryToPromoteWithFuncCmp (
644
692
CallBase &CB, Instruction *VPtr, ArrayRef<PromotionCandidate> Candidates,
645
- uint64_t TotalCount, ArrayRef <InstrProfValueData> ICallProfDataRef,
693
+ uint64_t TotalCount, MutableArrayRef <InstrProfValueData> ICallProfDataRef,
646
694
uint32_t NumCandidates, VTableGUIDCountsMap &VTableGUIDCounts) {
647
695
uint32_t NumPromoted = 0 ;
648
696
@@ -655,6 +703,8 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
655
703
NumOfPGOICallPromotion++;
656
704
NumPromoted++;
657
705
706
+ // Update the count and this entry will be erased later.
707
+ ICallProfDataRef[C.Index ].Count = 0 ;
658
708
if (!EnableVTableProfileUse || C.VTableGUIDAndCounts .empty ())
659
709
continue ;
660
710
@@ -679,21 +729,33 @@ bool IndirectCallPromoter::tryToPromoteWithFuncCmp(
679
729
" Number of promoted functions should not be greater than the number "
680
730
" of values in profile metadata" );
681
731
682
- // Update value profiles on the indirect call.
683
- updateFuncValueProfiles (CB, ICallProfDataRef.slice (NumPromoted), TotalCount,
684
- NumCandidates);
732
+ updateFuncValueProfiles (CB, ICallProfDataRef, TotalCount, NumCandidates);
685
733
updateVPtrValueProfiles (VPtr, VTableGUIDCounts);
686
734
return true ;
687
735
}
688
736
689
737
void IndirectCallPromoter::updateFuncValueProfiles (
690
- CallBase &CB, ArrayRef <InstrProfValueData> CallVDs, uint64_t TotalCount ,
691
- uint32_t MaxMDCount) {
738
+ CallBase &CB, MutableArrayRef <InstrProfValueData> CallVDs,
739
+ uint64_t TotalCount, uint32_t MaxMDCount) {
692
740
// First clear the existing !prof.
693
741
CB.setMetadata (LLVMContext::MD_prof, nullptr );
742
+
743
+ // Sort value profiles by count in descending order.
744
+ llvm::stable_sort (CallVDs, [](const InstrProfValueData &LHS,
745
+ const InstrProfValueData &RHS) {
746
+ return LHS.Count > RHS.Count ;
747
+ });
748
+ // Drop the <target-value, count> pair if count is zero.
749
+ ArrayRef<InstrProfValueData> VDs (
750
+ CallVDs.begin (),
751
+ llvm::upper_bound (CallVDs, 0U ,
752
+ [](uint64_t Count, const InstrProfValueData &ProfData) {
753
+ return ProfData.Count <= Count;
754
+ }));
755
+
694
756
// Annotate the remaining value profiles if counter is not zero.
695
757
if (TotalCount != 0 )
696
- annotateValueSite (M, CB, CallVDs , TotalCount, IPVK_IndirectCallTarget,
758
+ annotateValueSite (M, CB, VDs , TotalCount, IPVK_IndirectCallTarget,
697
759
MaxMDCount);
698
760
}
699
761
@@ -726,7 +788,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
726
788
uint64_t TotalFuncCount, uint32_t NumCandidates,
727
789
MutableArrayRef<InstrProfValueData> ICallProfDataRef,
728
790
VTableGUIDCountsMap &VTableGUIDCounts) {
729
- SmallVector<uint64_t , 4 > PromotedFuncCount;
791
+ SmallVector<std::pair< uint32_t , uint64_t > , 4 > PromotedFuncCount;
730
792
731
793
for (const auto &Candidate : Candidates) {
732
794
for (auto &[GUID, Count] : Candidate.VTableGUIDAndCounts )
@@ -771,7 +833,7 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
771
833
return Remark;
772
834
});
773
835
774
- PromotedFuncCount.push_back (Candidate.Count );
836
+ PromotedFuncCount.push_back ({ Candidate.Index , Candidate. Count } );
775
837
776
838
assert (TotalFuncCount >= Candidate.Count &&
777
839
" Within one prof metadata, total count is the sum of counts from "
@@ -792,22 +854,12 @@ bool IndirectCallPromoter::tryToPromoteWithVTableCmp(
792
854
// used to load multiple virtual functions. The vtable profiles needs to be
793
855
// updated properly in that case (e.g, for each indirect call annotate both
794
856
// type profiles and function profiles in one !prof).
795
- for (size_t I = 0 ; I < PromotedFuncCount.size (); I++)
796
- ICallProfDataRef[I].Count -=
797
- std::max (PromotedFuncCount[I], ICallProfDataRef[I].Count );
798
- // Sort value profiles by count in descending order.
799
- llvm::stable_sort (ICallProfDataRef, [](const InstrProfValueData &LHS,
800
- const InstrProfValueData &RHS) {
801
- return LHS.Count > RHS.Count ;
802
- });
803
- // Drop the <target-value, count> pair if count is zero.
804
- ArrayRef<InstrProfValueData> VDs (
805
- ICallProfDataRef.begin (),
806
- llvm::upper_bound (ICallProfDataRef, 0U ,
807
- [](uint64_t Count, const InstrProfValueData &ProfData) {
808
- return ProfData.Count <= Count;
809
- }));
810
- updateFuncValueProfiles (CB, VDs, TotalFuncCount, NumCandidates);
857
+ for (size_t I = 0 ; I < PromotedFuncCount.size (); I++) {
858
+ uint32_t Index = PromotedFuncCount[I].first ;
859
+ ICallProfDataRef[Index].Count -=
860
+ std::max (PromotedFuncCount[I].second , ICallProfDataRef[Index].Count );
861
+ }
862
+ updateFuncValueProfiles (CB, ICallProfDataRef, TotalFuncCount, NumCandidates);
811
863
updateVPtrValueProfiles (VPtr, VTableGUIDCounts);
812
864
return true ;
813
865
}
@@ -822,9 +874,22 @@ bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
822
874
uint64_t TotalCount;
823
875
auto ICallProfDataRef = ICallAnalysis.getPromotionCandidatesForInstruction (
824
876
CB, TotalCount, NumCandidates);
825
- if (!NumCandidates ||
826
- (PSI && PSI->hasProfileSummary () && !PSI->isHotCount (TotalCount)))
877
+ if (!NumCandidates)
827
878
continue ;
879
+ if (PSI && PSI->hasProfileSummary ()) {
880
+ // Don't promote cold candidates.
881
+ if (PSI->isColdCount (TotalCount)) {
882
+ LLVM_DEBUG (dbgs () << " Don't promote the cold candidate: TotalCount="
883
+ << TotalCount << " \n " );
884
+ continue ;
885
+ }
886
+ // Only pormote hot if ICPAllowHotOnly is true.
887
+ if (ICPAllowHotOnly && !PSI->isHotCount (TotalCount)) {
888
+ LLVM_DEBUG (dbgs () << " Don't promote the non-hot candidate: TotalCount="
889
+ << TotalCount << " \n " );
890
+ continue ;
891
+ }
892
+ }
828
893
829
894
auto PromotionCandidates = getPromotionCandidatesForCallSite (
830
895
*CB, ICallProfDataRef, TotalCount, NumCandidates);
0 commit comments