@@ -67,8 +67,6 @@ static cl::opt<unsigned int> MaxMemInstrCount(
6767
6868namespace {
6969
70- using LoopVector = SmallVector<Loop *, 8 >;
71-
7270// TODO: Check if we can use a sparse matrix here.
7371using CharMatrix = std::vector<std::vector<char >>;
7472
@@ -84,6 +82,14 @@ static cl::opt<unsigned int> MaxLoopNestDepth(
8482 " loop-interchange-max-loop-nest-depth" , cl::init(10 ), cl::Hidden,
8583 cl::desc(" Maximum depth of loop nest considered for the transform" ));
8684
85+ // Whether to apply by default.
86+ // TODO: Once this pass is enabled by default, remove this option and use the
87+ // value of PipelineTuningOptions.
88+ static cl::opt<bool > OnlyWhenForced (
89+ " loop-interchange-only-when-forced" , cl::init(false ), cl::ReallyHidden,
90+ cl::desc(
91+ " Apply interchanges only when explicitly specified metadata exists" ));
92+
8793#ifndef NDEBUG
8894static void printDepMatrix (CharMatrix &DepMatrix) {
8995 for (auto &Row : DepMatrix) {
@@ -233,7 +239,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
233239 return true ;
234240}
235241
236- static void populateWorklist (Loop &L, LoopVector &LoopList) {
242+ static void populateWorklist (Loop &L, SmallVectorImpl<Loop *> &LoopList) {
237243 LLVM_DEBUG (dbgs () << " Calling populateWorklist on Func: "
238244 << L.getHeader ()->getParent ()->getName () << " Loop: %"
239245 << L.getHeader ()->getName () << ' \n ' );
@@ -245,7 +251,7 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
245251 // nested.
246252 // Discard all loops above it added into Worklist.
247253 if (Vec->size () != 1 ) {
248- LoopList = {} ;
254+ LoopList. clear () ;
249255 return ;
250256 }
251257
@@ -256,27 +262,6 @@ static void populateWorklist(Loop &L, LoopVector &LoopList) {
256262 LoopList.push_back (CurrentLoop);
257263}
258264
259- static bool hasSupportedLoopDepth (SmallVectorImpl<Loop *> &LoopList,
260- OptimizationRemarkEmitter &ORE) {
261- unsigned LoopNestDepth = LoopList.size ();
262- if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
263- LLVM_DEBUG (dbgs () << " Unsupported depth of loop nest " << LoopNestDepth
264- << " , the supported range is [" << MinLoopNestDepth
265- << " , " << MaxLoopNestDepth << " ].\n " );
266- Loop **OuterLoop = LoopList.begin ();
267- ORE.emit ([&]() {
268- return OptimizationRemarkMissed (DEBUG_TYPE, " UnsupportedLoopNestDepth" ,
269- (*OuterLoop)->getStartLoc (),
270- (*OuterLoop)->getHeader ())
271- << " Unsupported depth of loop nest, the supported range is ["
272- << std::to_string (MinLoopNestDepth) << " , "
273- << std::to_string (MaxLoopNestDepth) << " ].\n " ;
274- });
275- return false ;
276- }
277- return true ;
278- }
279-
280265static bool isComputableLoopNest (ScalarEvolution *SE,
281266 ArrayRef<Loop *> LoopList) {
282267 for (Loop *L : LoopList) {
@@ -299,6 +284,26 @@ static bool isComputableLoopNest(ScalarEvolution *SE,
299284
300285namespace {
301286
287+ // / LoopInterchangeList manages the list of loops and the range to which the
288+ // / interchange may be applied.
289+ struct LoopInterchangeList {
290+ SmallVector<Loop *, 8 > LoopList;
291+ unsigned ListBegin = 0 ;
292+ unsigned ListEnd = 0 ;
293+
294+ LoopInterchangeList (LoopNest &LN)
295+ : LoopList(LN.getLoops()), ListBegin(0 ), ListEnd(LoopList.size()) {}
296+
297+ LoopInterchangeList (Loop &L) {
298+ populateWorklist (L, LoopList);
299+ ListBegin = 0 ;
300+ ListEnd = LoopList.size ();
301+ }
302+
303+ void checkMetadata (bool OnlyWhenForced);
304+ bool hasSupportedLoopDepth (OptimizationRemarkEmitter &ORE);
305+ };
306+
302307// / LoopInterchangeLegality checks if it is legal to interchange the loop.
303308class LoopInterchangeLegality {
304309public:
@@ -439,39 +444,38 @@ struct LoopInterchange {
439444 bool run (Loop *L) {
440445 if (L->getParentLoop ())
441446 return false ;
442- SmallVector<Loop *, 8 > LoopList;
443- populateWorklist (*L, LoopList);
444- return processLoopList (LoopList);
447+ LoopInterchangeList LIL (*L);
448+ return processLoopList (LIL);
445449 }
446450
447- bool run (LoopNest &LN ) {
448- SmallVector<Loop *, 8 > LoopList (LN. getLoops ()) ;
451+ bool run (LoopInterchangeList &LIL ) {
452+ const auto & LoopList = LIL. LoopList ;
449453 for (unsigned I = 1 ; I < LoopList.size (); ++I)
450454 if (LoopList[I]->getParentLoop () != LoopList[I - 1 ])
451455 return false ;
452- return processLoopList (LoopList );
456+ return processLoopList (LIL );
453457 }
454458
455- unsigned selectLoopForInterchange (ArrayRef<Loop *> LoopList ) {
459+ unsigned selectLoopForInterchange (LoopInterchangeList &LIL ) {
456460 // TODO: Add a better heuristic to select the loop to be interchanged based
457461 // on the dependence matrix. Currently we select the innermost loop.
458- return LoopList. size () - 1 ;
462+ return LIL. ListEnd - 1 ;
459463 }
460464
461- bool processLoopList (SmallVectorImpl<Loop *> &LoopList ) {
465+ bool processLoopList (LoopInterchangeList &LIL ) {
462466 bool Changed = false ;
463467
464468 // Ensure proper loop nest depth.
465- assert (hasSupportedLoopDepth (LoopList, *ORE) &&
469+ assert (LIL. hasSupportedLoopDepth (*ORE) &&
466470 " Unsupported depth of loop nest." );
467471
468- unsigned LoopNestDepth = LoopList.size ();
472+ unsigned LoopNestDepth = LIL. LoopList .size ();
469473
470474 LLVM_DEBUG (dbgs () << " Processing LoopList of size = " << LoopNestDepth
471475 << " \n " );
472476
473477 CharMatrix DependencyMatrix;
474- Loop *OuterMostLoop = *(LoopList.begin ());
478+ Loop *OuterMostLoop = *(LIL. LoopList .begin ());
475479 if (!populateDependencyMatrix (DependencyMatrix, LoopNestDepth,
476480 OuterMostLoop, DI, SE, ORE)) {
477481 LLVM_DEBUG (dbgs () << " Populating dependency matrix failed\n " );
@@ -488,7 +492,7 @@ struct LoopInterchange {
488492 return false ;
489493 }
490494
491- unsigned SelecLoopId = selectLoopForInterchange (LoopList );
495+ unsigned SelectLoopId = selectLoopForInterchange (LIL );
492496 // Obtain the loop vector returned from loop cache analysis beforehand,
493497 // and put each <Loop, index> pair into a map for constant time query
494498 // later. Indices in loop vector reprsent the optimal order of the
@@ -504,19 +508,20 @@ struct LoopInterchange {
504508 CostMap[LoopCosts[i].first ] = i;
505509 }
506510 }
511+
507512 // We try to achieve the globally optimal memory access for the loopnest,
508513 // and do interchange based on a bubble-sort fasion. We start from
509514 // the innermost loop, move it outwards to the best possible position
510515 // and repeat this process.
511- for (unsigned j = SelecLoopId ; j > 0 ; j--) {
516+ for (unsigned j = LIL. ListEnd - LIL. ListBegin - 1 ; j > 0 ; j--) {
512517 bool ChangedPerIter = false ;
513- for (unsigned i = SelecLoopId ; i > SelecLoopId - j; i--) {
514- bool Interchanged = processLoop (LoopList[i], LoopList[i - 1 ], i, i - 1 ,
515- DependencyMatrix, CostMap);
518+ for (unsigned i = SelectLoopId ; i > SelectLoopId - j; i--) {
519+ bool Interchanged = processLoop (LIL. LoopList [i], LIL. LoopList [i - 1 ], i,
520+ i - 1 , DependencyMatrix, CostMap);
516521 if (!Interchanged)
517522 continue ;
518523 // Loops interchanged, update LoopList accordingly.
519- std::swap (LoopList[i - 1 ], LoopList[i]);
524+ std::swap (LIL. LoopList [i - 1 ], LIL. LoopList [i]);
520525 // Update the DependencyMatrix
521526 interChangeDependencies (DependencyMatrix, i, i - 1 );
522527
@@ -526,6 +531,7 @@ struct LoopInterchange {
526531 ChangedPerIter |= Interchanged;
527532 Changed |= Interchanged;
528533 }
534+
529535 // Early abort if there was no interchange during an entire round of
530536 // moving loops outwards.
531537 if (!ChangedPerIter)
@@ -572,6 +578,69 @@ struct LoopInterchange {
572578
573579} // end anonymous namespace
574580
581+ bool LoopInterchangeList::hasSupportedLoopDepth (
582+ OptimizationRemarkEmitter &ORE) {
583+ unsigned LoopNestDepth = ListEnd - ListBegin;
584+ if (LoopNestDepth < MinLoopNestDepth || LoopNestDepth > MaxLoopNestDepth) {
585+ LLVM_DEBUG (dbgs () << " Unsupported depth of loop nest " << LoopNestDepth
586+ << " , the supported range is [" << MinLoopNestDepth
587+ << " , " << MaxLoopNestDepth << " ].\n " );
588+ Loop *OuterLoop = LoopList[ListBegin];
589+ ORE.emit ([&]() {
590+ return OptimizationRemarkMissed (DEBUG_TYPE, " UnsupportedLoopNestDepth" ,
591+ OuterLoop->getStartLoc (),
592+ OuterLoop->getHeader ())
593+ << " Unsupported depth of loop nest, the supported range is ["
594+ << std::to_string (MinLoopNestDepth) << " , "
595+ << std::to_string (MaxLoopNestDepth) << " ].\n " ;
596+ });
597+ return false ;
598+ }
599+ return true ;
600+ }
601+
602+ // Check the metadata for interchange. The outermost one is taken into account
603+ // and nested ones are ignored. The metadata affects the entire loop nest such
604+ // that the outermost loop is the loop for which the metadata is specified. For
605+ // example, in the following example, the loop-interchange will be performed
606+ // only to the outermost two loops.
607+ //
608+ // for (...)
609+ // for (...)
610+ // #pragma clang loop interchange(disable)
611+ // for (...)
612+ // for (...)
613+ // for (...)
614+ // Stmt
615+ //
616+ void LoopInterchangeList::checkMetadata (bool OnlyWhenForced) {
617+ ListBegin = 0 ;
618+ ListEnd = LoopList.size ();
619+
620+ for (unsigned I = 0 ; I != LoopList.size (); I++) {
621+ Loop *L = LoopList[I];
622+ auto Value = findStringMetadataForLoop (L, " llvm.loop.interchange.enable" );
623+ if (!Value)
624+ continue ;
625+
626+ const MDOperand *Op = *Value;
627+ assert (Op && mdconst::hasa<ConstantInt>(*Op) && " invalid metadata" );
628+ bool Enabled = mdconst::extract<ConstantInt>(*Op)->getZExtValue ();
629+ if (Enabled && OnlyWhenForced) {
630+ ListBegin = I;
631+ } else if (!Enabled && !OnlyWhenForced) {
632+ ListEnd = I;
633+ } else if (OnlyWhenForced) {
634+ ListEnd = 0 ;
635+ }
636+ break ;
637+ }
638+
639+ LLVM_DEBUG (
640+ dbgs () << " LoopInterchange will be applied to the range: [from, to]=["
641+ << ListBegin << " , " << ListEnd - 1 << " ]\n " ;);
642+ }
643+
575644bool LoopInterchangeLegality::containsUnsafeInstructions (BasicBlock *BB) {
576645 return any_of (*BB, [](const Instruction &I) {
577646 return I.mayHaveSideEffects () || I.mayReadFromMemory ();
@@ -1748,7 +1817,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17481817 LoopStandardAnalysisResults &AR,
17491818 LPMUpdater &U) {
17501819 Function &F = *LN.getParent ();
1751- SmallVector<Loop *, 8 > LoopList (LN. getLoops () );
1820+ LoopInterchangeList LIL (LN);
17521821
17531822 if (MaxMemInstrCount < 1 ) {
17541823 LLVM_DEBUG (dbgs () << " MaxMemInstrCount should be at least 1" );
@@ -1757,14 +1826,19 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17571826 OptimizationRemarkEmitter ORE (&F);
17581827
17591828 // Ensure minimum depth of the loop nest to do the interchange.
1760- if (!hasSupportedLoopDepth (LoopList, ORE))
1829+ if (!LIL. hasSupportedLoopDepth (ORE))
17611830 return PreservedAnalyses::all ();
17621831 // Ensure computable loop nest.
1763- if (!isComputableLoopNest (&AR.SE , LoopList)) {
1832+ if (!isComputableLoopNest (&AR.SE , LIL. LoopList )) {
17641833 LLVM_DEBUG (dbgs () << " Not valid loop candidate for interchange\n " );
17651834 return PreservedAnalyses::all ();
17661835 }
17671836
1837+ LIL.checkMetadata (OnlyWhenForced);
1838+ // Ensure the depth again.
1839+ if (!LIL.hasSupportedLoopDepth (ORE))
1840+ return PreservedAnalyses::all ();
1841+
17681842 ORE.emit ([&]() {
17691843 return OptimizationRemarkAnalysis (DEBUG_TYPE, " Dependence" ,
17701844 LN.getOutermostLoop ().getStartLoc (),
@@ -1776,7 +1850,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
17761850 std::unique_ptr<CacheCost> CC =
17771851 CacheCost::getCacheCost (LN.getOutermostLoop (), AR, DI);
17781852
1779- if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , CC, &ORE).run (LN ))
1853+ if (!LoopInterchange (&AR.SE , &AR.LI , &DI, &AR.DT , CC, &ORE).run (LIL ))
17801854 return PreservedAnalyses::all ();
17811855 U.markLoopNestChanged (true );
17821856 return getLoopPassPreservedAnalyses ();
0 commit comments