@@ -2682,6 +2682,33 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
2682
2682
return I->second ;
2683
2683
}
2684
2684
2685
+ // / Knowing that loop \p L would be fully unrolled after vectorisation, add
2686
+ // / instructions that will get simplified and thus should not have any cost to
2687
+ // / \p InstsToIgnore
2688
+ static void AddFullyUnrolledInstructionsToIgnore (
2689
+ Loop *L, const LoopVectorizationLegality::InductionList &IL,
2690
+ SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
2691
+ auto *Cmp = L->getLatchCmpInst ();
2692
+ if (!Cmp)
2693
+ return ;
2694
+ InstsToIgnore.insert (Cmp);
2695
+ for (const auto &[IV, IndDesc] : IL) {
2696
+ // Get next iteration value of the induction variable
2697
+ Instruction *IVInst =
2698
+ cast<Instruction>(IV->getIncomingValueForBlock (L->getLoopLatch ()));
2699
+ bool IsSimplifiedAway = true ;
2700
+ // Check that this value used only to exit the loop
2701
+ for (auto *UIV : IVInst->users ()) {
2702
+ if (UIV != IV && UIV != Cmp) {
2703
+ IsSimplifiedAway = false ;
2704
+ break ;
2705
+ }
2706
+ }
2707
+ if (IsSimplifiedAway)
2708
+ InstsToIgnore.insert (IVInst);
2709
+ }
2710
+ }
2711
+
2685
2712
void InnerLoopVectorizer::createInductionResumeVPValues (
2686
2713
const SCEV2ValueTy &ExpandedSCEVs, Value *MainVectorTripCount,
2687
2714
SmallPtrSetImpl<PHINode *> *IVSubset) {
@@ -5592,19 +5619,13 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5592
5619
InstructionCost LoopVectorizationCostModel::expectedCost (ElementCount VF) {
5593
5620
InstructionCost Cost;
5594
5621
5595
- // If with the given VF loop gets fully unrolled, ignore the costs of
5596
- // comparison and induction instructions, as they'll get simplified away
5597
- SmallPtrSet<const Value *, 16 > ValuesToIgnoreForVF;
5622
+ // If with the given fixed width VF loop gets fully unrolled, ignore the costs
5623
+ // of comparison and induction instructions, as they'll get simplified away
5624
+ SmallPtrSet<Instruction *, 2 > ValuesToIgnoreForVF;
5598
5625
auto TC = PSE.getSE ()->getSmallConstantTripCount (TheLoop);
5599
- auto *Cmp = TheLoop->getLatchCmpInst ();
5600
- if (Cmp && TC == VF.getKnownMinValue ()) {
5601
- ValuesToIgnoreForVF.insert (Cmp);
5602
- for (const auto &[IV, IndDesc] : Legal->getInductionVars ()) {
5603
- Instruction *IVInc = cast<Instruction>(
5604
- IV->getIncomingValueForBlock (TheLoop->getLoopLatch ()));
5605
- ValuesToIgnoreForVF.insert (IVInc);
5606
- }
5607
- }
5626
+ if (VF.isFixed () && TC == VF.getFixedValue ())
5627
+ AddFullyUnrolledInstructionsToIgnore (TheLoop, Legal->getInductionVars (),
5628
+ ValuesToIgnoreForVF);
5608
5629
5609
5630
// For each block.
5610
5631
for (BasicBlock *BB : TheLoop->blocks ()) {
@@ -7298,16 +7319,10 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
7298
7319
7299
7320
// If with the given VF loop gets fully unrolled, ignore the costs of
7300
7321
// comparison and induction instructions, as they'll get simplified away
7301
- auto TC = CM.PSE .getSE ()->getSmallConstantTripCount (OrigLoop);
7302
- auto *Cmp = OrigLoop->getLatchCmpInst ();
7303
- if (Cmp && TC == VF.getKnownMinValue ()) {
7304
- CostCtx.SkipCostComputation .insert (Cmp);
7305
- for (const auto &[IV, IndDesc] : Legal->getInductionVars ()) {
7306
- Instruction *IVInc = cast<Instruction>(
7307
- IV->getIncomingValueForBlock (OrigLoop->getLoopLatch ()));
7308
- CostCtx.SkipCostComputation .insert (IVInc);
7309
- }
7310
- }
7322
+ auto TC = PSE.getSE ()->getSmallConstantTripCount (OrigLoop);
7323
+ if (VF.isFixed () && TC == VF.getFixedValue ())
7324
+ AddFullyUnrolledInstructionsToIgnore (OrigLoop, Legal->getInductionVars (),
7325
+ CostCtx.SkipCostComputation );
7311
7326
7312
7327
for (Instruction *IVInst : IVInsts) {
7313
7328
if (CostCtx.skipCostComputation (IVInst, VF.isVector ()))
0 commit comments