@@ -407,6 +407,32 @@ class LoopInterchangeLegality {
407407 SmallVector<PHINode *, 8 > InnerLoopInductions;
408408};
409409
410+ // / Manages information utilized by the profitability check for cache. The main
411+ // / purpose of this class is to delay the computation of CacheCost until it is
412+ // / actually needed.
413+ class CacheCostManager {
414+ Loop *OutermostLoop;
415+ LoopStandardAnalysisResults *AR;
416+ DependenceInfo *DI;
417+
418+ // / CacheCost for \ref OutermostLoop. Once it is computed, it is cached. Note
419+ // / that the result can be nullptr.
420+ std::optional<std::unique_ptr<CacheCost>> CC;
421+
422+ // / Maps each loop to an index representing the optimal position within the
423+ // / loop-nest, as determined by the cache cost analysis.
424+ DenseMap<const Loop *, unsigned > CostMap;
425+
426+ void computeIfUnitinialized ();
427+
428+ public:
429+ CacheCostManager (Loop *OutermostLoop, LoopStandardAnalysisResults *AR,
430+ DependenceInfo *DI)
431+ : OutermostLoop(OutermostLoop), AR(AR), DI(DI) {}
432+ CacheCost *getCacheCost ();
433+ const DenseMap<const Loop *, unsigned > &getCostMap ();
434+ };
435+
410436// / LoopInterchangeProfitability checks if it is profitable to interchange the
411437// / loop.
412438class LoopInterchangeProfitability {
@@ -418,15 +444,12 @@ class LoopInterchangeProfitability {
418444 // / Check if the loop interchange is profitable.
419445 bool isProfitable (const Loop *InnerLoop, const Loop *OuterLoop,
420446 unsigned InnerLoopId, unsigned OuterLoopId,
421- CharMatrix &DepMatrix,
422- const DenseMap<const Loop *, unsigned > &CostMap,
423- std::unique_ptr<CacheCost> &CC);
447+ CharMatrix &DepMatrix, CacheCostManager &CCM);
424448
425449private:
426450 int getInstrOrderCost ();
427451 std::optional<bool > isProfitablePerLoopCacheAnalysis (
428- const DenseMap<const Loop *, unsigned > &CostMap,
429- std::unique_ptr<CacheCost> &CC);
452+ const DenseMap<const Loop *, unsigned > &CostMap, CacheCost *CC);
430453 std::optional<bool > isProfitablePerInstrOrderCost ();
431454 std::optional<bool > isProfitableForVectorization (unsigned InnerLoopId,
432455 unsigned OuterLoopId,
@@ -477,15 +500,15 @@ struct LoopInterchange {
477500 LoopInfo *LI = nullptr ;
478501 DependenceInfo *DI = nullptr ;
479502 DominatorTree *DT = nullptr ;
480- std::unique_ptr<CacheCost> CC = nullptr ;
503+ LoopStandardAnalysisResults *AR = nullptr ;
481504
482505 // / Interface to emit optimization remarks.
483506 OptimizationRemarkEmitter *ORE;
484507
485508 LoopInterchange (ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
486- DominatorTree *DT, std::unique_ptr<CacheCost> &CC ,
509+ DominatorTree *DT, LoopStandardAnalysisResults *AR ,
487510 OptimizationRemarkEmitter *ORE)
488- : SE(SE), LI(LI), DI(DI), DT(DT), CC(std::move(CC) ), ORE(ORE) {}
511+ : SE(SE), LI(LI), DI(DI), DT(DT), AR(AR ), ORE(ORE) {}
489512
490513 bool run (Loop *L) {
491514 if (L->getParentLoop ())
@@ -540,19 +563,7 @@ struct LoopInterchange {
540563 }
541564
542565 unsigned SelecLoopId = selectLoopForInterchange (LoopList);
543- // Obtain the loop vector returned from loop cache analysis beforehand,
544- // and put each <Loop, index> pair into a map for constant time query
545- // later. Indices in loop vector reprsent the optimal order of the
546- // corresponding loop, e.g., given a loopnest with depth N, index 0
547- // indicates the loop should be placed as the outermost loop and index N
548- // indicates the loop should be placed as the innermost loop.
549- //
550- // For the old pass manager CacheCost would be null.
551- DenseMap<const Loop *, unsigned > CostMap;
552- if (CC != nullptr ) {
553- for (const auto &[Idx, Cost] : enumerate(CC->getLoopCosts ()))
554- CostMap[Cost.first ] = Idx;
555- }
566+ CacheCostManager CCM (LoopList[0 ], AR, DI);
556567 // We try to achieve the globally optimal memory access for the loopnest,
557568 // and do interchange based on a bubble-sort fasion. We start from
558569 // the innermost loop, move it outwards to the best possible position
@@ -561,7 +572,7 @@ struct LoopInterchange {
561572 bool ChangedPerIter = false ;
562573 for (unsigned i = SelecLoopId; i > SelecLoopId - j; i--) {
563574 bool Interchanged =
564- processLoop (LoopList, i, i - 1 , DependencyMatrix, CostMap );
575+ processLoop (LoopList, i, i - 1 , DependencyMatrix, CCM );
565576 ChangedPerIter |= Interchanged;
566577 Changed |= Interchanged;
567578 }
@@ -576,7 +587,7 @@ struct LoopInterchange {
576587 bool processLoop (SmallVectorImpl<Loop *> &LoopList, unsigned InnerLoopId,
577588 unsigned OuterLoopId,
578589 std::vector<std::vector<char >> &DependencyMatrix,
579- const DenseMap< const Loop *, unsigned > &CostMap ) {
590+ CacheCostManager &CCM ) {
580591 Loop *OuterLoop = LoopList[OuterLoopId];
581592 Loop *InnerLoop = LoopList[InnerLoopId];
582593 LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
@@ -589,7 +600,7 @@ struct LoopInterchange {
589600 LLVM_DEBUG (dbgs () << " Loops are legal to interchange\n " );
590601 LoopInterchangeProfitability LIP (OuterLoop, InnerLoop, SE, ORE);
591602 if (!LIP.isProfitable (InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
592- DependencyMatrix, CostMap, CC )) {
603+ DependencyMatrix, CCM )) {
593604 LLVM_DEBUG (dbgs () << " Interchanging loops not profitable.\n " );
594605 return false ;
595606 }
@@ -1122,6 +1133,35 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
11221133 return true ;
11231134}
11241135
1136+ void CacheCostManager::computeIfUnitinialized () {
1137+ if (CC.has_value ())
1138+ return ;
1139+
1140+ LLVM_DEBUG (dbgs () << " Compute CacheCost.\n " );
1141+ CC = CacheCost::getCacheCost (*OutermostLoop, *AR, *DI);
1142+ // Obtain the loop vector returned from loop cache analysis beforehand,
1143+ // and put each <Loop, index> pair into a map for constant time query
1144+ // later. Indices in loop vector reprsent the optimal order of the
1145+ // corresponding loop, e.g., given a loopnest with depth N, index 0
1146+ // indicates the loop should be placed as the outermost loop and index N
1147+ // indicates the loop should be placed as the innermost loop.
1148+ //
1149+ // For the old pass manager CacheCost would be null.
1150+ if (*CC != nullptr )
1151+ for (const auto &[Idx, Cost] : enumerate((*CC)->getLoopCosts ()))
1152+ CostMap[Cost.first ] = Idx;
1153+ }
1154+
1155+ CacheCost *CacheCostManager::getCacheCost () {
1156+ computeIfUnitinialized ();
1157+ return CC->get ();
1158+ }
1159+
1160+ const DenseMap<const Loop *, unsigned > &CacheCostManager::getCostMap () {
1161+ computeIfUnitinialized ();
1162+ return CostMap;
1163+ }
1164+
11251165int LoopInterchangeProfitability::getInstrOrderCost () {
11261166 unsigned GoodOrder, BadOrder;
11271167 BadOrder = GoodOrder = 0 ;
@@ -1177,8 +1217,7 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
11771217
11781218std::optional<bool >
11791219LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis (
1180- const DenseMap<const Loop *, unsigned > &CostMap,
1181- std::unique_ptr<CacheCost> &CC) {
1220+ const DenseMap<const Loop *, unsigned > &CostMap, CacheCost *CC) {
11821221 // This is the new cost model returned from loop cache analysis.
11831222 // A smaller index means the loop should be placed an outer loop, and vice
11841223 // versa.
@@ -1246,9 +1285,7 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
12461285
12471286bool LoopInterchangeProfitability::isProfitable (
12481287 const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1249- unsigned OuterLoopId, CharMatrix &DepMatrix,
1250- const DenseMap<const Loop *, unsigned > &CostMap,
1251- std::unique_ptr<CacheCost> &CC) {
1288+ unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
12521289 // isProfitable() is structured to avoid endless loop interchange. If the
12531290 // highest priority rule (isProfitablePerLoopCacheAnalysis by default) could
12541291 // decide the profitability then, profitability check will stop and return the
@@ -1261,9 +1298,12 @@ bool LoopInterchangeProfitability::isProfitable(
12611298 std::optional<bool > shouldInterchange;
12621299 for (RuleTy RT : Profitabilities) {
12631300 switch (RT) {
1264- case RuleTy::PerLoopCacheAnalysis:
1301+ case RuleTy::PerLoopCacheAnalysis: {
1302+ CacheCost *CC = CCM.getCacheCost ();
1303+ const DenseMap<const Loop *, unsigned > &CostMap = CCM.getCostMap ();
12651304 shouldInterchange = isProfitablePerLoopCacheAnalysis (CostMap, CC);
12661305 break ;
1306+ }
12671307 case RuleTy::PerInstrOrderCost:
12681308 shouldInterchange = isProfitablePerInstrOrderCost ();
12691309 break ;
@@ -1841,10 +1881,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
18411881 });
18421882
18431883 DependenceInfo DI (&F, &AR.AA , &AR.SE , &AR.LI );
1844- std::unique_ptr<CacheCost> CC =
1845- CacheCost::getCacheCost (LN.getOutermostLoop (), AR, DI);
1846-
1847- if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , CC, &ORE).run (LN))
1884+ if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , &AR, &ORE).run (LN))
18481885 return PreservedAnalyses::all ();
18491886 U.markLoopNestChanged (true );
18501887 return getLoopPassPreservedAnalyses ();
0 commit comments