@@ -1259,10 +1259,12 @@ class LoopVectorizationCostModel {
12591259 // /
12601260 // / TODO: We should use actual block probability here, if available.
12611261 // / Currently, we always assume predicated blocks have a 50% chance of
1262- // / executing.
1262+ // / executing, apart from blocks that are only predicated due to tail folding .
12631263 inline unsigned
12641264 getPredBlockCostDivisor (TargetTransformInfo::TargetCostKind CostKind,
12651265 BasicBlock *BB) const {
1266+ // If a block wasn't originally predicated but was predicated due to
1267+ // e.g. tail folding, don't divide the cost.
12661268 if (!Legal->blockNeedsPredication (BB))
12671269 return 1 ;
12681270 return CostKind == TTI::TCK_CodeSize ? 1 : 2 ;
@@ -5105,9 +5107,10 @@ InstructionCost LoopVectorizationCostModel::expectedCost(ElementCount VF) {
51055107 // stores and instructions that may divide by zero) will now be
51065108 // unconditionally executed. For the scalar case, we may not always execute
51075109 // the predicated block, if it is an if-else block. Thus, scale the block's
5108- // cost by the probability of executing it. blockNeedsPredication from
5109- // Legal is used so as to not include all blocks in tail folded loops.
5110- if (VF.isScalar () && Legal->blockNeedsPredication (BB))
5110+ // cost by the probability of executing it.
5111+ // getPredBlockCostDivisor won't include blocks that are only predicated due
5112+ // to tail folded loops
5113+ if (VF.isScalar ())
51115114 BlockCost /= getPredBlockCostDivisor (CostKind, BB);
51125115
51135116 Cost += BlockCost;
0 commit comments