@@ -578,8 +578,10 @@ class InnerLoopVectorizer {
578578 // / The profitablity analysis.
579579 LoopVectorizationCostModel *Cost;
580580
581- // / BFI and PSI are used to check for profile guided size optimizations.
581+ // / Used to calculate the probability of predicated blocks in
582+ // / getPredBlockCostDivisor.
582583 BlockFrequencyInfo *BFI;
584+ // / Used to check for profile guided size optimizations.
583585 ProfileSummaryInfo *PSI;
584586
585587 // / Structure to hold information about generated runtime checks, responsible
@@ -900,7 +902,7 @@ class LoopVectorizationCostModel {
900902 InterleavedAccessInfo &IAI,
901903 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
902904 : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
903- TTI (TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
905+ TTI (TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), BFI(BFI), TheFunction(F),
904906 Hints(Hints), InterleaveInfo(IAI) {
905907 if (TTI.supportsScalableVectors () || ForceTargetSupportsScalableVectors)
906908 initializeVScaleForTuning ();
@@ -1249,6 +1251,17 @@ class LoopVectorizationCostModel {
12491251 // / Superset of instructions that return true for isScalarWithPredication.
12501252 bool isPredicatedInst (Instruction *I) const ;
12511253
1254+ // / A helper function that returns how much we should divide the cost of a
1255+ // / predicated block by. Typically this is the reciprocal of the block
1256+ // / probability, i.e. if we return X we are assuming the predicated block will
1257+ // / execute once for every X iterations of the loop header so the block should
1258+ // / only contribute 1/X of its cost to the total cost calculation, but when
1259+ // / optimizing for code size it will just be 1 as code size costs don't depend
1260+ // / on execution probabilities.
1261+ inline unsigned
1262+ getPredBlockCostDivisor (TargetTransformInfo::TargetCostKind CostKind,
1263+ const BasicBlock *BB) const ;
1264+
12521265 // / Return the costs for our two available strategies for lowering a
12531266 // / div/rem operation which requires speculating at least one lane.
12541267 // / First result is for scalarization (will be invalid for scalable
@@ -1711,6 +1724,8 @@ class LoopVectorizationCostModel {
17111724 // / Interface to emit optimization remarks.
17121725 OptimizationRemarkEmitter *ORE;
17131726
1727+ const BlockFrequencyInfo *BFI;
1728+
17141729 const Function *TheFunction;
17151730
17161731 // / Loop Vectorize Hint.
@@ -2866,6 +2881,19 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
28662881 }
28672882}
28682883
2884+ unsigned LoopVectorizationCostModel::getPredBlockCostDivisor (
2885+ TargetTransformInfo::TargetCostKind CostKind, const BasicBlock *BB) const {
2886+ if (CostKind == TTI::TCK_CodeSize)
2887+ return 1 ;
2888+
2889+ uint64_t HeaderFreq = BFI->getBlockFreq (TheLoop->getHeader ()).getFrequency ();
2890+ uint64_t BBFreq = BFI->getBlockFreq (BB).getFrequency ();
2891+ assert (HeaderFreq >= BBFreq &&
2892+ " Header has smaller block freq than dominated BB?" );
2893+ return BFI->getBlockFreq (TheLoop->getHeader ()).getFrequency () /
2894+ BFI->getBlockFreq (BB).getFrequency ();
2895+ }
2896+
28692897std::pair<InstructionCost, InstructionCost>
28702898LoopVectorizationCostModel::getDivRemSpeculationCost (Instruction *I,
28712899 ElementCount VF) const {
@@ -2902,7 +2930,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
29022930 // Scale the cost by the probability of executing the predicated blocks.
29032931 // This assumes the predicated block for each vector lane is equally
29042932 // likely.
2905- ScalarizationCost = ScalarizationCost / getPredBlockCostDivisor (CostKind);
2933+ ScalarizationCost =
2934+ ScalarizationCost / getPredBlockCostDivisor (CostKind, I->getParent ());
29062935 }
29072936
29082937 InstructionCost SafeDivisorCost = 0 ;
@@ -5035,7 +5064,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
50355064 }
50365065
50375066 // Scale the total scalar cost by block probability.
5038- ScalarCost /= getPredBlockCostDivisor (CostKind);
5067+ ScalarCost /= getPredBlockCostDivisor (CostKind, PredInst-> getParent () );
50395068
50405069 // Compute the discount. A non-negative discount means the vector version
50415070 // of the instruction costs more, and scalarizing would be beneficial.
@@ -5088,7 +5117,7 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
50885117 // cost by the probability of executing it. blockNeedsPredication from
50895118 // Legal is used so as to not include all blocks in tail folded loops.
50905119 if (VF.isScalar () && Legal->blockNeedsPredication (BB))
5091- BlockCost /= getPredBlockCostDivisor (CostKind);
5120+ BlockCost /= getPredBlockCostDivisor (CostKind, BB );
50925121
50935122 Cost += BlockCost;
50945123 }
@@ -5167,7 +5196,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
51675196 // conditional branches, but may not be executed for each vector lane. Scale
51685197 // the cost by the probability of executing the predicated block.
51695198 if (isPredicatedInst (I)) {
5170- Cost /= getPredBlockCostDivisor (CostKind);
5199+ Cost /= getPredBlockCostDivisor (CostKind, I-> getParent () );
51715200
51725201 // Add the cost of an i1 extract and a branch
51735202 auto *VecI1Ty =
@@ -6727,6 +6756,11 @@ bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const {
67276756 SkipCostComputation.contains (UI);
67286757}
67296758
6759+ unsigned VPCostContext::getPredBlockCostDivisor (
6760+ TargetTransformInfo::TargetCostKind CostKind, const BasicBlock *BB) const {
6761+ return CM.getPredBlockCostDivisor (CostKind, BB);
6762+ }
6763+
67306764InstructionCost
67316765LoopVectorizationPlanner::precomputeCosts (VPlan &Plan, ElementCount VF,
67326766 VPCostContext &CostCtx) const {
@@ -10310,9 +10344,7 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
1031010344
1031110345 auto &MAMProxy = AM.getResult <ModuleAnalysisManagerFunctionProxy>(F);
1031210346 PSI = MAMProxy.getCachedResult <ProfileSummaryAnalysis>(*F.getParent ());
10313- BFI = nullptr ;
10314- if (PSI && PSI->hasProfileSummary ())
10315- BFI = &AM.getResult <BlockFrequencyAnalysis>(F);
10347+ BFI = &AM.getResult <BlockFrequencyAnalysis>(F);
1031610348 LoopVectorizeResult Result = runImpl (F);
1031710349 if (!Result.MadeAnyChange )
1031810350 return PreservedAnalyses::all ();
0 commit comments