@@ -55,6 +55,8 @@ using namespace llvm;
5555// / Metadata attribute names
5656static const char *const LLVMLoopInterchangeFollowupAll =
5757 " llvm.loop.interchange.followup_all" ;
58+ static const char *const LLVMLoopInterchangeFollowupNextOuter =
59+ " llvm.loop.interchange.followup_next_outer" ;
5860static const char *const LLVMLoopInterchangeFollowupOuter =
5961 " llvm.loop.interchange.followup_outer" ;
6062static const char *const LLVMLoopInterchangeFollowupInner =
@@ -533,6 +535,8 @@ struct LoopInterchange {
533535 }
534536 }
535537
538+ // If OnlyWhenForced is true, only process loops for which interchange is
539+ // explicitly enabled.
536540 if (OnlyWhenForced)
537541 return processEnabledLoop (LoopList, DependencyMatrix, CostMap);
538542
@@ -564,8 +568,10 @@ struct LoopInterchange {
564568 Loop *InnerLoop = LoopList[InnerLoopId];
565569 LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
566570 << " and OuterLoopId = " << OuterLoopId << " \n " );
567- if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false )
571+ if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false ) {
572+ LLVM_DEBUG (dbgs () << " Not interchanging loops. It is disabled.\n " );
568573 return false ;
574+ }
569575 LoopInterchangeLegality LIL (OuterLoop, InnerLoop, SE, ORE);
570576 if (!LIL.canInterchangeLoops (InnerLoopId, OuterLoopId, DependencyMatrix)) {
571577 LLVM_DEBUG (dbgs () << " Not interchanging loops. Cannot prove legality.\n " );
@@ -608,41 +614,144 @@ struct LoopInterchange {
608614 std::vector<std::vector<char >> &DependencyMatrix,
609615 const DenseMap<const Loop *, unsigned > &CostMap) {
610616 bool Changed = false ;
611- for (unsigned InnerLoopId = LoopList.size () - 1 ; InnerLoopId > 0 ;
612- InnerLoopId--) {
613- unsigned OuterLoopId = InnerLoopId - 1 ;
614- if (findMetadata (LoopList[OuterLoopId]) != true )
615- continue ;
616617
617- MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID ();
618- bool Interchanged =
619- processLoop (LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
620- OuterLoopId, DependencyMatrix, CostMap);
621-
622- // TODO: Consolidate the duplicate code in `processLoopList`.
623- if (Interchanged) {
624- std::swap (LoopList[OuterLoopId], LoopList[InnerLoopId]);
625- // Update the DependencyMatrix
626- interChangeDependencies (DependencyMatrix, InnerLoopId, OuterLoopId);
618+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
619+ DenseMap<Loop *, unsigned > Loop2Index;
620+ for (unsigned I = 0 ; I != LoopList.size (); I++)
621+ Loop2Index[LoopList[I]] = I;
622+
623+ // Hold outer loops to be exchanged, in the current nest order.
624+ SmallVector<Loop *, 4 > Worklist;
625+
626+ // Helper funciton to try to add a new loop into the Worklist. Return false
627+ // if there is a duplicate in the loop to be interchanged.
628+ auto AddLoopIfEnabled = [&](Loop *L) {
629+ if (findMetadata (L) == true ) {
630+ if (!Worklist.empty ()) {
631+ // Because the loops are sorted in the order of the current nest, it
632+ // is sufficient to compare with the last element.
633+ unsigned InnerLoopId = Loop2Index[Worklist.back ()] + 1 ;
634+ unsigned OuterLoopId = Loop2Index[L];
635+ if (OuterLoopId <= InnerLoopId) {
636+ ORE->emit ([&]() {
637+ return OptimizationRemarkMissed (DEBUG_TYPE, " AmbiguousOrder" ,
638+ L->getStartLoc (), L->getHeader ())
639+ << " The loops to be interchanged are overlapping." ;
640+ });
641+ return false ;
642+ }
643+ }
644+ Worklist.push_back (L);
645+ }
646+ return true ;
647+ };
627648
628- LLVM_DEBUG (dbgs () << " Dependency matrix after interchange:\n " ;
629- printDepMatrix (DependencyMatrix));
649+ // Initialize Worklist. To process the loops in inner-loop-first order, add
650+ // them to the worklist in the outer-loop-first order.
651+ for (unsigned I = 0 ; I != LoopList.size (); I++)
652+ if (!AddLoopIfEnabled (LoopList[I]))
653+ return Changed;
654+
655+ // Set an upper bound of the number of transformations to avoid infinite
656+ // loop. There is no deep meaning behind the current value (square of the
657+ // size of LoopList).
658+ // TODO: Is this really necessary?
659+ const unsigned MaxAttemptsCount = LoopList.size () * LoopList.size ();
660+ unsigned Attempts = 0 ;
661+
662+ // Process the loops. An exchange is applied to two loops, but a metadata
663+ // replacement can be applied to three loops: the two loops plus the next
664+ // outer loop, if it exists. This is because it's necessary to express the
665+ // information about the order of the application of interchanges in cases
666+ // where the target loops to be exchanged are overlapping, e.g.,
667+ //
668+ // #pragma clang loop interchange(enable)
669+ // for(int i=0;i<N;i++)
670+ // #pragma clang loop interchange(enable)
671+ // for (int j=0;j<N;j++)
672+ // for (int k=0;k<N;k++)
673+ // ...
674+ //
675+ // In this case we will exchange the innermost two loops at first, the
676+ // follow-up metadata including enabling interchange is attached on the
677+ // outermost loop, and it is enqueued as the next candidate to be processed.
678+ while (!Worklist.empty () && Attempts < MaxAttemptsCount) {
679+ Loop *TargetLoop = Worklist.pop_back_val ();
680+ assert (findMetadata (TargetLoop) == true &&
681+ " Some metadata was unexpectedlly removed" );
682+ unsigned OuterLoopId = Loop2Index[TargetLoop];
683+ unsigned InnerLoopId = OuterLoopId + 1 ;
684+ if (InnerLoopId >= LoopList.size ()) {
685+ ORE->emit ([&]() {
686+ return OptimizationRemarkMissed (DEBUG_TYPE, " InnermostLoop" ,
687+ TargetLoop->getStartLoc (),
688+ TargetLoop->getHeader ())
689+ << " The metadata is invalid with an innermost loop." ;
690+ });
691+ break ;
692+ }
693+ MDNode *LoopID = TargetLoop->getLoopID ();
694+ bool Interchanged = processLoop (LoopList, InnerLoopId, OuterLoopId,
695+ DependencyMatrix, CostMap);
696+ if (!Interchanged) {
697+ ORE->emit ([&]() {
698+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotInterchanged" ,
699+ TargetLoop->getStartLoc (),
700+ TargetLoop->getHeader ())
701+ << " Failed to perform explicitly specified loop interchange." ;
702+ });
703+ break ;
630704 }
631705
632- std::optional<MDNode *> MDOuterLoopID =
633- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
634- LLVMLoopInterchangeFollowupOuter});
635- if (MDOuterLoopID)
636- LoopList[OuterLoopId]->setLoopID (*MDOuterLoopID);
706+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
707+ Loop *NextOuterLoop = nullptr ;
708+ if (0 < OuterLoopId)
709+ NextOuterLoop = LoopList[OuterLoopId - 1 ];
710+ Loop *OuterLoop = LoopList[OuterLoopId];
711+ Loop *InnerLoop = LoopList[InnerLoopId];
712+ Attempts++;
713+ Changed = true ;
714+ Loop2Index[OuterLoop] = OuterLoopId;
715+ Loop2Index[InnerLoop] = InnerLoopId;
637716
717+ // Update the metadata.
718+ std::optional<MDNode *> MDNextOuterLoopID =
719+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
720+ LLVMLoopInterchangeFollowupNextOuter});
721+ std::optional<MDNode *> MDOuterLoopID =
722+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
723+ LLVMLoopInterchangeFollowupOuter});
638724 std::optional<MDNode *> MDInnerLoopID =
639- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
640- LLVMLoopInterchangeFollowupInner});
725+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
726+ LLVMLoopInterchangeFollowupInner});
727+ if (MDNextOuterLoopID) {
728+ if (NextOuterLoop) {
729+ NextOuterLoop->setLoopID (*MDNextOuterLoopID);
730+ } else {
731+ LLVM_DEBUG (dbgs ()
732+ << " New metadata for the next outer loop is ignored.\n " );
733+ }
734+ }
735+ if (MDOuterLoopID)
736+ OuterLoop->setLoopID (*MDOuterLoopID);
641737 if (MDInnerLoopID)
642- LoopList[InnerLoopId]->setLoopID (*MDInnerLoopID);
643-
644- Changed |= Interchanged;
738+ InnerLoop->setLoopID (*MDInnerLoopID);
739+
740+ // Add new elements, paying attention to the order.
741+ bool Valid = true ;
742+ if (NextOuterLoop)
743+ Valid &= AddLoopIfEnabled (NextOuterLoop);
744+ Valid &= AddLoopIfEnabled (OuterLoop);
745+ Valid &= AddLoopIfEnabled (InnerLoop);
746+ if (!Valid)
747+ break ;
645748 }
749+
750+ LLVM_DEBUG ({
751+ if (!Worklist.empty ())
752+ dbgs () << " Some metadata was ignored because the maximum number of "
753+ " attempts was reached.\n " ;
754+ });
646755 return Changed;
647756 }
648757};
0 commit comments