@@ -521,6 +521,8 @@ struct Formula {
521521
522522 bool hasZeroEnd () const ;
523523
524+ bool countsDownToZero () const ;
525+
524526 size_t getNumRegs () const ;
525527 Type *getType () const ;
526528
@@ -705,6 +707,16 @@ bool Formula::hasZeroEnd() const {
705707 return true ;
706708}
707709
710+ bool Formula::countsDownToZero () const {
711+ if (!hasZeroEnd ())
712+ return false ;
713+ assert (BaseRegs.size () == 1 && " hasZeroEnd should mean one BaseReg" );
714+ const APInt *StepInt;
715+ if (!match (BaseRegs[0 ], m_scev_AffineAddRec (m_SCEV (), m_scev_APInt (StepInt))))
716+ return false ;
717+ return StepInt->isNegative ();
718+ }
719+
708720// / Return the total number of register operands used by this formula. This does
709721// / not include register uses implied by non-constant addrec strides.
710722size_t Formula::getNumRegs () const {
@@ -1227,20 +1239,21 @@ class Cost {
12271239 return C.NumRegs == ~0u ;
12281240 }
12291241
1230- void RateFormula (const Formula &F,
1231- SmallPtrSetImpl<const SCEV *> &Regs,
1232- const DenseSet<const SCEV *> &VisitedRegs,
1233- const LSRUse &LU,
1242+ void RateFormula (const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1243+ const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU,
1244+ bool HardwareLoopProfitable,
12341245 SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr );
12351246
12361247 void print (raw_ostream &OS) const ;
12371248 void dump () const ;
12381249
12391250private:
12401251 void RateRegister (const Formula &F, const SCEV *Reg,
1241- SmallPtrSetImpl<const SCEV *> &Regs);
1252+ SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,
1253+ bool HardwareLoopProfitable);
12421254 void RatePrimaryRegister (const Formula &F, const SCEV *Reg,
12431255 SmallPtrSetImpl<const SCEV *> &Regs,
1256+ const LSRUse &LU, bool HardwareLoopProfitable,
12441257 SmallPtrSetImpl<const SCEV *> *LoserRegs);
12451258};
12461259
@@ -1383,7 +1396,8 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
13831396
13841397// / Tally up interesting quantities from the given register.
13851398void Cost::RateRegister (const Formula &F, const SCEV *Reg,
1386- SmallPtrSetImpl<const SCEV *> &Regs) {
1399+ SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,
1400+ bool HardwareLoopProfitable) {
13871401 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
13881402 // If this is an addrec for another loop, it should be an invariant
13891403 // with respect to L since L is the innermost loop (at least
@@ -1419,13 +1433,18 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
14191433 SE->isLoopInvariant (Start, L)))
14201434 LoopCost = 0 ;
14211435 }
1436+ // If the loop counts down to zero and we'll be using a hardware loop then
1437+ // the addrec will be combined into the hardware loop instruction.
1438+ if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero () &&
1439+ HardwareLoopProfitable)
1440+ LoopCost = 0 ;
14221441 C.AddRecCost += LoopCost;
14231442
14241443 // Add the step value register, if it needs one.
14251444 // TODO: The non-affine case isn't precisely modeled here.
14261445 if (!AR->isAffine () || !isa<SCEVConstant>(AR->getOperand (1 ))) {
14271446 if (!Regs.count (AR->getOperand (1 ))) {
1428- RateRegister (F, AR->getOperand (1 ), Regs);
1447+ RateRegister (F, AR->getOperand (1 ), Regs, LU, HardwareLoopProfitable );
14291448 if (isLoser ())
14301449 return ;
14311450 }
@@ -1448,22 +1467,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
14481467// / one of those regs an instant loser.
14491468void Cost::RatePrimaryRegister (const Formula &F, const SCEV *Reg,
14501469 SmallPtrSetImpl<const SCEV *> &Regs,
1470+ const LSRUse &LU, bool HardwareLoopProfitable,
14511471 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
14521472 if (LoserRegs && LoserRegs->count (Reg)) {
14531473 Lose ();
14541474 return ;
14551475 }
14561476 if (Regs.insert (Reg).second ) {
1457- RateRegister (F, Reg, Regs);
1477+ RateRegister (F, Reg, Regs, LU, HardwareLoopProfitable );
14581478 if (LoserRegs && isLoser ())
14591479 LoserRegs->insert (Reg);
14601480 }
14611481}
14621482
1463- void Cost::RateFormula (const Formula &F,
1464- SmallPtrSetImpl<const SCEV *> &Regs,
1483+ void Cost::RateFormula (const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
14651484 const DenseSet<const SCEV *> &VisitedRegs,
1466- const LSRUse &LU,
1485+ const LSRUse &LU, bool HardwareLoopProfitable,
14671486 SmallPtrSetImpl<const SCEV *> *LoserRegs) {
14681487 if (isLoser ())
14691488 return ;
@@ -1477,7 +1496,8 @@ void Cost::RateFormula(const Formula &F,
14771496 Lose ();
14781497 return ;
14791498 }
1480- RatePrimaryRegister (F, ScaledReg, Regs, LoserRegs);
1499+ RatePrimaryRegister (F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1500+ LoserRegs);
14811501 if (isLoser ())
14821502 return ;
14831503 }
@@ -1486,7 +1506,8 @@ void Cost::RateFormula(const Formula &F,
14861506 Lose ();
14871507 return ;
14881508 }
1489- RatePrimaryRegister (F, BaseReg, Regs, LoserRegs);
1509+ RatePrimaryRegister (F, BaseReg, Regs, LU, HardwareLoopProfitable,
1510+ LoserRegs);
14901511 if (isLoser ())
14911512 return ;
14921513 }
@@ -2112,6 +2133,7 @@ class LSRInstance {
21122133 TTI::AddressingModeKind AMK;
21132134 mutable SCEVExpander Rewriter;
21142135 bool Changed = false ;
2136+ bool HardwareLoopProfitable = false ;
21152137
21162138 // / This is the insert position that the current loop's induction variable
21172139 // / increment should be placed. In simple loops, this is the latch block's
@@ -3592,7 +3614,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
35923614 if (!VisitedLSRUse.count (LUIdx) && !LF.isUseFullyOutsideLoop (L)) {
35933615 Formula F;
35943616 F.initialMatch (S, L, SE);
3595- BaselineCost.RateFormula (F, Regs, VisitedRegs, LU);
3617+ BaselineCost.RateFormula (F, Regs, VisitedRegs, LU,
3618+ HardwareLoopProfitable);
35963619 VisitedLSRUse.insert (LUIdx);
35973620 }
35983621
@@ -4730,7 +4753,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
47304753 // the corresponding bad register from the Regs set.
47314754 Cost CostF (L, SE, TTI, AMK);
47324755 Regs.clear ();
4733- CostF.RateFormula (F, Regs, VisitedRegs, LU, &LoserRegs);
4756+ CostF.RateFormula (F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4757+ &LoserRegs);
47344758 if (CostF.isLoser ()) {
47354759 // During initial formula generation, undesirable formulae are generated
47364760 // by uses within other loops that have some non-trivial address mode or
@@ -4763,7 +4787,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
47634787
47644788 Cost CostBest (L, SE, TTI, AMK);
47654789 Regs.clear ();
4766- CostBest.RateFormula (Best, Regs, VisitedRegs, LU);
4790+ CostBest.RateFormula (Best, Regs, VisitedRegs, LU,
4791+ HardwareLoopProfitable);
47674792 if (CostF.isLess (CostBest))
47684793 std::swap (F, Best);
47694794 LLVM_DEBUG (dbgs () << " Filtering out formula " ; F.print (dbgs ());
@@ -5021,9 +5046,9 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
50215046 Cost CostFA (L, SE, TTI, AMK);
50225047 Cost CostFB (L, SE, TTI, AMK);
50235048 Regs.clear ();
5024- CostFA.RateFormula (FA, Regs, VisitedRegs, LU);
5049+ CostFA.RateFormula (FA, Regs, VisitedRegs, LU, HardwareLoopProfitable );
50255050 Regs.clear ();
5026- CostFB.RateFormula (FB, Regs, VisitedRegs, LU);
5051+ CostFB.RateFormula (FB, Regs, VisitedRegs, LU, HardwareLoopProfitable );
50275052 return CostFA.isLess (CostFB);
50285053 };
50295054
@@ -5428,7 +5453,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
54285453 // the current best, prune the search at that point.
54295454 NewCost = CurCost;
54305455 NewRegs = CurRegs;
5431- NewCost.RateFormula (F, NewRegs, VisitedRegs, LU);
5456+ NewCost.RateFormula (F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable );
54325457 if (NewCost.isLess (SolutionCost)) {
54335458 Workspace.push_back (&F);
54345459 if (Workspace.size () != Uses.size ()) {
@@ -6133,6 +6158,12 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
61336158 L->getHeader ()->printAsOperand (dbgs (), /* PrintType=*/ false );
61346159 dbgs () << " :\n " );
61356160
6161+ // Check if we expect this loop to use a hardware loop instruction, which will
6162+ // be used when calculating the costs of formulas.
6163+ HardwareLoopInfo HWLoopInfo (L);
6164+ HardwareLoopProfitable =
6165+ TTI.isHardwareLoopProfitable (L, SE, AC, &TLI, HWLoopInfo);
6166+
61366167 // Configure SCEVExpander already now, so the correct mode is used for
61376168 // isSafeToExpand() checks.
61386169#if LLVM_ENABLE_ABI_BREAKING_CHECKS
0 commit comments