@@ -521,6 +521,8 @@ struct Formula {
521
521
522
522
bool hasZeroEnd () const ;
523
523
524
+ bool countsDownToZero () const ;
525
+
524
526
size_t getNumRegs () const ;
525
527
Type *getType () const ;
526
528
@@ -705,6 +707,16 @@ bool Formula::hasZeroEnd() const {
705
707
return true ;
706
708
}
707
709
710
+ bool Formula::countsDownToZero () const {
711
+ if (!hasZeroEnd ())
712
+ return false ;
713
+ assert (BaseRegs.size () == 1 && " hasZeroEnd should mean one BaseReg" );
714
+ const APInt *StepInt;
715
+ if (!match (BaseRegs[0 ], m_scev_AffineAddRec (m_SCEV (), m_scev_APInt (StepInt))))
716
+ return false ;
717
+ return StepInt->isNegative ();
718
+ }
719
+
708
720
// / Return the total number of register operands used by this formula. This does
709
721
// / not include register uses implied by non-constant addrec strides.
710
722
size_t Formula::getNumRegs () const {
@@ -1227,20 +1239,21 @@ class Cost {
1227
1239
return C.NumRegs == ~0u ;
1228
1240
}
1229
1241
1230
- void RateFormula (const Formula &F,
1231
- SmallPtrSetImpl<const SCEV *> &Regs,
1232
- const DenseSet<const SCEV *> &VisitedRegs,
1233
- const LSRUse &LU,
1242
+ void RateFormula (const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1243
+ const DenseSet<const SCEV *> &VisitedRegs, const LSRUse &LU,
1244
+ bool HardwareLoopProfitable,
1234
1245
SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr );
1235
1246
1236
1247
void print (raw_ostream &OS) const ;
1237
1248
void dump () const ;
1238
1249
1239
1250
private:
1240
1251
void RateRegister (const Formula &F, const SCEV *Reg,
1241
- SmallPtrSetImpl<const SCEV *> &Regs);
1252
+ SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,
1253
+ bool HardwareLoopProfitable);
1242
1254
void RatePrimaryRegister (const Formula &F, const SCEV *Reg,
1243
1255
SmallPtrSetImpl<const SCEV *> &Regs,
1256
+ const LSRUse &LU, bool HardwareLoopProfitable,
1244
1257
SmallPtrSetImpl<const SCEV *> *LoserRegs);
1245
1258
};
1246
1259
@@ -1383,7 +1396,8 @@ static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
1383
1396
1384
1397
// / Tally up interesting quantities from the given register.
1385
1398
void Cost::RateRegister (const Formula &F, const SCEV *Reg,
1386
- SmallPtrSetImpl<const SCEV *> &Regs) {
1399
+ SmallPtrSetImpl<const SCEV *> &Regs, const LSRUse &LU,
1400
+ bool HardwareLoopProfitable) {
1387
1401
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
1388
1402
// If this is an addrec for another loop, it should be an invariant
1389
1403
// with respect to L since L is the innermost loop (at least
@@ -1419,13 +1433,18 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
1419
1433
SE->isLoopInvariant (Start, L)))
1420
1434
LoopCost = 0 ;
1421
1435
}
1436
+ // If the loop counts down to zero and we'll be using a hardware loop then
1437
+ // the addrec will be combined into the hardware loop instruction.
1438
+ if (LU.Kind == LSRUse::ICmpZero && F.countsDownToZero () &&
1439
+ HardwareLoopProfitable)
1440
+ LoopCost = 0 ;
1422
1441
C.AddRecCost += LoopCost;
1423
1442
1424
1443
// Add the step value register, if it needs one.
1425
1444
// TODO: The non-affine case isn't precisely modeled here.
1426
1445
if (!AR->isAffine () || !isa<SCEVConstant>(AR->getOperand (1 ))) {
1427
1446
if (!Regs.count (AR->getOperand (1 ))) {
1428
- RateRegister (F, AR->getOperand (1 ), Regs);
1447
+ RateRegister (F, AR->getOperand (1 ), Regs, LU, HardwareLoopProfitable );
1429
1448
if (isLoser ())
1430
1449
return ;
1431
1450
}
@@ -1448,22 +1467,22 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
1448
1467
// / one of those regs an instant loser.
1449
1468
void Cost::RatePrimaryRegister (const Formula &F, const SCEV *Reg,
1450
1469
SmallPtrSetImpl<const SCEV *> &Regs,
1470
+ const LSRUse &LU, bool HardwareLoopProfitable,
1451
1471
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1452
1472
if (LoserRegs && LoserRegs->count (Reg)) {
1453
1473
Lose ();
1454
1474
return ;
1455
1475
}
1456
1476
if (Regs.insert (Reg).second ) {
1457
- RateRegister (F, Reg, Regs);
1477
+ RateRegister (F, Reg, Regs, LU, HardwareLoopProfitable );
1458
1478
if (LoserRegs && isLoser ())
1459
1479
LoserRegs->insert (Reg);
1460
1480
}
1461
1481
}
1462
1482
1463
- void Cost::RateFormula (const Formula &F,
1464
- SmallPtrSetImpl<const SCEV *> &Regs,
1483
+ void Cost::RateFormula (const Formula &F, SmallPtrSetImpl<const SCEV *> &Regs,
1465
1484
const DenseSet<const SCEV *> &VisitedRegs,
1466
- const LSRUse &LU,
1485
+ const LSRUse &LU, bool HardwareLoopProfitable,
1467
1486
SmallPtrSetImpl<const SCEV *> *LoserRegs) {
1468
1487
if (isLoser ())
1469
1488
return ;
@@ -1477,7 +1496,8 @@ void Cost::RateFormula(const Formula &F,
1477
1496
Lose ();
1478
1497
return ;
1479
1498
}
1480
- RatePrimaryRegister (F, ScaledReg, Regs, LoserRegs);
1499
+ RatePrimaryRegister (F, ScaledReg, Regs, LU, HardwareLoopProfitable,
1500
+ LoserRegs);
1481
1501
if (isLoser ())
1482
1502
return ;
1483
1503
}
@@ -1486,7 +1506,8 @@ void Cost::RateFormula(const Formula &F,
1486
1506
Lose ();
1487
1507
return ;
1488
1508
}
1489
- RatePrimaryRegister (F, BaseReg, Regs, LoserRegs);
1509
+ RatePrimaryRegister (F, BaseReg, Regs, LU, HardwareLoopProfitable,
1510
+ LoserRegs);
1490
1511
if (isLoser ())
1491
1512
return ;
1492
1513
}
@@ -2112,6 +2133,7 @@ class LSRInstance {
2112
2133
TTI::AddressingModeKind AMK;
2113
2134
mutable SCEVExpander Rewriter;
2114
2135
bool Changed = false ;
2136
+ bool HardwareLoopProfitable = false ;
2115
2137
2116
2138
// / This is the insert position that the current loop's induction variable
2117
2139
// / increment should be placed. In simple loops, this is the latch block's
@@ -3592,7 +3614,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
3592
3614
if (!VisitedLSRUse.count (LUIdx) && !LF.isUseFullyOutsideLoop (L)) {
3593
3615
Formula F;
3594
3616
F.initialMatch (S, L, SE);
3595
- BaselineCost.RateFormula (F, Regs, VisitedRegs, LU);
3617
+ BaselineCost.RateFormula (F, Regs, VisitedRegs, LU,
3618
+ HardwareLoopProfitable);
3596
3619
VisitedLSRUse.insert (LUIdx);
3597
3620
}
3598
3621
@@ -4730,7 +4753,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4730
4753
// the corresponding bad register from the Regs set.
4731
4754
Cost CostF (L, SE, TTI, AMK);
4732
4755
Regs.clear ();
4733
- CostF.RateFormula (F, Regs, VisitedRegs, LU, &LoserRegs);
4756
+ CostF.RateFormula (F, Regs, VisitedRegs, LU, HardwareLoopProfitable,
4757
+ &LoserRegs);
4734
4758
if (CostF.isLoser ()) {
4735
4759
// During initial formula generation, undesirable formulae are generated
4736
4760
// by uses within other loops that have some non-trivial address mode or
@@ -4763,7 +4787,8 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
4763
4787
4764
4788
Cost CostBest (L, SE, TTI, AMK);
4765
4789
Regs.clear ();
4766
- CostBest.RateFormula (Best, Regs, VisitedRegs, LU);
4790
+ CostBest.RateFormula (Best, Regs, VisitedRegs, LU,
4791
+ HardwareLoopProfitable);
4767
4792
if (CostF.isLess (CostBest))
4768
4793
std::swap (F, Best);
4769
4794
LLVM_DEBUG (dbgs () << " Filtering out formula " ; F.print (dbgs ());
@@ -5021,9 +5046,9 @@ void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
5021
5046
Cost CostFA (L, SE, TTI, AMK);
5022
5047
Cost CostFB (L, SE, TTI, AMK);
5023
5048
Regs.clear ();
5024
- CostFA.RateFormula (FA, Regs, VisitedRegs, LU);
5049
+ CostFA.RateFormula (FA, Regs, VisitedRegs, LU, HardwareLoopProfitable );
5025
5050
Regs.clear ();
5026
- CostFB.RateFormula (FB, Regs, VisitedRegs, LU);
5051
+ CostFB.RateFormula (FB, Regs, VisitedRegs, LU, HardwareLoopProfitable );
5027
5052
return CostFA.isLess (CostFB);
5028
5053
};
5029
5054
@@ -5428,7 +5453,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
5428
5453
// the current best, prune the search at that point.
5429
5454
NewCost = CurCost;
5430
5455
NewRegs = CurRegs;
5431
- NewCost.RateFormula (F, NewRegs, VisitedRegs, LU);
5456
+ NewCost.RateFormula (F, NewRegs, VisitedRegs, LU, HardwareLoopProfitable );
5432
5457
if (NewCost.isLess (SolutionCost)) {
5433
5458
Workspace.push_back (&F);
5434
5459
if (Workspace.size () != Uses.size ()) {
@@ -6133,6 +6158,12 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
6133
6158
L->getHeader ()->printAsOperand (dbgs (), /* PrintType=*/ false );
6134
6159
dbgs () << " :\n " );
6135
6160
6161
+ // Check if we expect this loop to use a hardware loop instruction, which will
6162
+ // be used when calculating the costs of formulas.
6163
+ HardwareLoopInfo HWLoopInfo (L);
6164
+ HardwareLoopProfitable =
6165
+ TTI.isHardwareLoopProfitable (L, SE, AC, &TLI, HWLoopInfo);
6166
+
6136
6167
// Configure SCEVExpander already now, so the correct mode is used for
6137
6168
// isSafeToExpand() checks.
6138
6169
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
0 commit comments