@@ -55,6 +55,8 @@ using namespace llvm;
5555// / Metadata attribute names
5656static const char *const LLVMLoopInterchangeFollowupAll =
5757 " llvm.loop.interchange.followup_all" ;
58+ static const char *const LLVMLoopInterchangeFollowupNextOuter =
59+ " llvm.loop.interchange.followup_next_outer" ;
5860static const char *const LLVMLoopInterchangeFollowupOuter =
5961 " llvm.loop.interchange.followup_outer" ;
6062static const char *const LLVMLoopInterchangeFollowupInner =
@@ -533,6 +535,8 @@ struct LoopInterchange {
533535 }
534536 }
535537
538+ // If OnlyWhenForced is true, only process loops for which interchange is
539+ // explicitly enabled.
536540 if (OnlyWhenForced)
537541 return processEnabledLoop (LoopList, DependencyMatrix, CostMap);
538542
@@ -564,8 +568,10 @@ struct LoopInterchange {
564568 Loop *InnerLoop = LoopList[InnerLoopId];
565569 LLVM_DEBUG (dbgs () << " Processing InnerLoopId = " << InnerLoopId
566570 << " and OuterLoopId = " << OuterLoopId << " \n " );
567- if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false )
571+ if (findMetadata (OuterLoop) == false || findMetadata (InnerLoop) == false ) {
572+ LLVM_DEBUG (dbgs () << " Not interchanging loops. It is disabled.\n " );
568573 return false ;
574+ }
569575 LoopInterchangeLegality LIL (OuterLoop, InnerLoop, SE, ORE);
570576 if (!LIL.canInterchangeLoops (InnerLoopId, OuterLoopId, DependencyMatrix)) {
571577 LLVM_DEBUG (dbgs () << " Not interchanging loops. Cannot prove legality.\n " );
@@ -608,41 +614,145 @@ struct LoopInterchange {
608614 std::vector<std::vector<char >> &DependencyMatrix,
609615 const DenseMap<const Loop *, unsigned > &CostMap) {
610616 bool Changed = false ;
611- for (unsigned InnerLoopId = LoopList.size () - 1 ; InnerLoopId > 0 ;
612- InnerLoopId--) {
613- unsigned OuterLoopId = InnerLoopId - 1 ;
614- if (findMetadata (LoopList[OuterLoopId]) != true )
615- continue ;
616617
617- MDNode *MDOrigLoopID = LoopList[OuterLoopId]->getLoopID ();
618- bool Interchanged =
619- processLoop (LoopList[InnerLoopId], LoopList[OuterLoopId], InnerLoopId,
620- OuterLoopId, DependencyMatrix, CostMap);
621-
622- // TODO: Consolidate the duplicate code in `processLoopList`.
623- if (Interchanged) {
624- std::swap (LoopList[OuterLoopId], LoopList[InnerLoopId]);
625- // Update the DependencyMatrix
626- interChangeDependencies (DependencyMatrix, InnerLoopId, OuterLoopId);
618+ // Manage the index so that LoopList[Loop2Index[L]] == L for each loop L.
619+ DenseMap<Loop *, unsigned > Loop2Index;
620+ for (unsigned I = 0 ; I != LoopList.size (); I++)
621+ Loop2Index[LoopList[I]] = I;
622+
623+ // Hold outer loops to be exchanged (i.e., loops that have
624+ // "llvm.loop.interchange.enable" is true), in the current nest order.
625+ SmallVector<Loop *, 4 > Worklist;
626+
627+ // Helper funciton to try to add a new loop into the Worklist. Return false
628+ // if there is a duplicate in the loop to be interchanged.
629+ auto AddLoopIfEnabled = [&](Loop *L) {
630+ if (findMetadata (L) == true ) {
631+ if (!Worklist.empty ()) {
632+ // Because the loops are sorted in the order of the current nest, it
633+ // is sufficient to compare with the last element.
634+ unsigned InnerLoopId = Loop2Index[Worklist.back ()] + 1 ;
635+ unsigned OuterLoopId = Loop2Index[L];
636+ if (OuterLoopId <= InnerLoopId) {
637+ ORE->emit ([&]() {
638+ return OptimizationRemarkMissed (DEBUG_TYPE, " AmbiguousOrder" ,
639+ L->getStartLoc (), L->getHeader ())
640+ << " The loops to be interchanged are overlapping." ;
641+ });
642+ return false ;
643+ }
644+ }
645+ Worklist.push_back (L);
646+ }
647+ return true ;
648+ };
627649
628- LLVM_DEBUG (dbgs () << " Dependency matrix after interchange:\n " ;
629- printDepMatrix (DependencyMatrix));
650+ // Initialize Worklist. To process the loops in inner-loop-first order, add
651+ // them to the worklist in the outer-loop-first order.
652+ for (unsigned I = 0 ; I != LoopList.size (); I++)
653+ if (!AddLoopIfEnabled (LoopList[I]))
654+ return Changed;
655+
656+ // Set an upper bound of the number of transformations to avoid infinite
657+ // loop. There is no deep meaning behind the current value (square of the
658+ // size of LoopList).
659+ // TODO: Is this really necessary?
660+ const unsigned MaxAttemptsCount = LoopList.size () * LoopList.size ();
661+ unsigned Attempts = 0 ;
662+
663+ // Process the loops. An exchange is applied to two loops, but a metadata
664+ // replacement can be applied to three loops: the two loops plus the next
665+ // outer loop, if it exists. This is because it's necessary to express the
666+ // information about the order of the application of interchanges in cases
667+ // where the target loops to be exchanged are overlapping, e.g.,
668+ //
669+ // #pragma clang loop interchange(enable)
670+ // for(int i=0;i<N;i++)
671+ // #pragma clang loop interchange(enable)
672+ // for (int j=0;j<N;j++)
673+ // for (int k=0;k<N;k++)
674+ // ...
675+ //
676+ // In this case we will exchange the innermost two loops at first, the
677+ // follow-up metadata including enabling interchange is attached on the
678+ // outermost loop, and it is enqueued as the next candidate to be processed.
679+ while (!Worklist.empty () && Attempts < MaxAttemptsCount) {
680+ Loop *TargetLoop = Worklist.pop_back_val ();
681+ assert (findMetadata (TargetLoop) == true &&
682+ " Some metadata was unexpectedlly removed" );
683+ unsigned OuterLoopId = Loop2Index[TargetLoop];
684+ unsigned InnerLoopId = OuterLoopId + 1 ;
685+ if (InnerLoopId >= LoopList.size ()) {
686+ ORE->emit ([&]() {
687+ return OptimizationRemarkMissed (DEBUG_TYPE, " InnermostLoop" ,
688+ TargetLoop->getStartLoc (),
689+ TargetLoop->getHeader ())
690+ << " The metadata is invalid with an innermost loop." ;
691+ });
692+ break ;
693+ }
694+ MDNode *LoopID = TargetLoop->getLoopID ();
695+ bool Interchanged = processLoop (LoopList, InnerLoopId, OuterLoopId,
696+ DependencyMatrix, CostMap);
697+ if (!Interchanged) {
698+ ORE->emit ([&]() {
699+ return OptimizationRemarkMissed (DEBUG_TYPE, " NotInterchanged" ,
700+ TargetLoop->getStartLoc (),
701+ TargetLoop->getHeader ())
702+ << " Failed to perform explicitly specified loop interchange." ;
703+ });
704+ break ;
630705 }
631706
632- std::optional<MDNode *> MDOuterLoopID =
633- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
634- LLVMLoopInterchangeFollowupOuter});
635- if (MDOuterLoopID)
636- LoopList[OuterLoopId]->setLoopID (*MDOuterLoopID);
707+ // The next outer loop, or nullptr if TargetLoop is the outermost one.
708+ Loop *NextOuterLoop = nullptr ;
709+ if (0 < OuterLoopId)
710+ NextOuterLoop = LoopList[OuterLoopId - 1 ];
711+ Loop *OuterLoop = LoopList[OuterLoopId];
712+ Loop *InnerLoop = LoopList[InnerLoopId];
713+ Attempts++;
714+ Changed = true ;
715+ Loop2Index[OuterLoop] = OuterLoopId;
716+ Loop2Index[InnerLoop] = InnerLoopId;
637717
718+ // Update the metadata.
719+ std::optional<MDNode *> MDNextOuterLoopID =
720+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
721+ LLVMLoopInterchangeFollowupNextOuter});
722+ std::optional<MDNode *> MDOuterLoopID =
723+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
724+ LLVMLoopInterchangeFollowupOuter});
638725 std::optional<MDNode *> MDInnerLoopID =
639- makeFollowupLoopID (MDOrigLoopID, {LLVMLoopInterchangeFollowupAll,
640- LLVMLoopInterchangeFollowupInner});
726+ makeFollowupLoopID (LoopID, {LLVMLoopInterchangeFollowupAll,
727+ LLVMLoopInterchangeFollowupInner});
728+ if (MDNextOuterLoopID) {
729+ if (NextOuterLoop) {
730+ NextOuterLoop->setLoopID (*MDNextOuterLoopID);
731+ } else {
732+ LLVM_DEBUG (dbgs ()
733+ << " New metadata for the next outer loop is ignored.\n " );
734+ }
735+ }
736+ if (MDOuterLoopID)
737+ OuterLoop->setLoopID (*MDOuterLoopID);
641738 if (MDInnerLoopID)
642- LoopList[InnerLoopId]->setLoopID (*MDInnerLoopID);
643-
644- Changed |= Interchanged;
739+ InnerLoop->setLoopID (*MDInnerLoopID);
740+
741+ // Add new elements, paying attention to the order.
742+ bool Valid = true ;
743+ if (NextOuterLoop)
744+ Valid &= AddLoopIfEnabled (NextOuterLoop);
745+ Valid &= AddLoopIfEnabled (OuterLoop);
746+ Valid &= AddLoopIfEnabled (InnerLoop);
747+ if (!Valid)
748+ break ;
645749 }
750+
751+ LLVM_DEBUG ({
752+ if (!Worklist.empty ())
753+ dbgs () << " Some metadata was ignored because the maximum number of "
754+ " attempts was reached.\n " ;
755+ });
646756 return Changed;
647757 }
648758};
0 commit comments