llvm · ShivaChen · Jan 8, 2024 · Jan 26, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -82,6 +82,101 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
 }
 #endif
 
+static bool isDirectionNegative(std::vector<Dependence::DVEntry> &DV) {
+  for (unsigned Level = 1; Level <= DV.size(); ++Level) {
+    unsigned char Direction = DV[Level - 1].Direction;
+    if (Direction == Dependence::DVEntry::EQ)
+      continue;
+    if (Direction == Dependence::DVEntry::GT ||
+        Direction == Dependence::DVEntry::GE)
+      return true;
+    return false;
+  }
+  return false;
+}
+
+static void dumpDirection(raw_ostream &OS,
+                          std::vector<Dependence::DVEntry> &DV) {
+  OS << " [";
+  for (unsigned II = 1; II <= DV.size(); ++II) {
+    unsigned Direction = DV[II - 1].Direction;
+    if (Direction == Dependence::DVEntry::ALL)
+      OS << "*";
+    else {
+      if (Direction & Dependence::DVEntry::LT)
+        OS << "<";
+      if (Direction & Dependence::DVEntry::EQ)
+        OS << "=";
+      if (Direction & Dependence::DVEntry::GT)
+        OS << ">";
+    }
+    if (II < DV.size())
+      OS << " ";
+  }
+  OS << "]\n";
+}
+
+// Get the Loops will affect Expr result.
+static void getAffectedLoop(const SCEV *Expr, SmallBitVector &Loops,
+                            ScalarEvolution *SE) {
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
+  if (!AddRec)
+    return;
+
+  Loops.set(AddRec->getLoop()->getLoopDepth());
+  const SCEV *Start = AddRec->getStart();
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  getAffectedLoop(Start, Loops, SE);
+  getAffectedLoop(Step, Loops, SE);
+}
+
+// Update the Direction of undistributed loop to EQ.
+static void
+updateUndistributedLoopDirection(std::vector<Dependence::DVEntry> &DV,
+                                 ScalarEvolution *SE, Instruction *Src,
+                                 Instruction *Dst) {
+  SmallBitVector DistributedLoops(DV.size() + 1);
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
+  const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
+  const SCEV *DstSCEV = SE->getSCEV(DstPtr);
+  getAffectedLoop(SrcSCEV, DistributedLoops, SE);
+  getAffectedLoop(DstSCEV, DistributedLoops, SE);
+  for (unsigned II = 1; II <= DV.size(); ++II)
+    // Set the direction of the loop to EQ if the loop won't affect the
+    // SCEV of Src and Dst.
+    if (!DistributedLoops.test(II)) {
+      LLVM_DEBUG(dbgs() << "Set level " << II << " loop direction to =\n");
+      DV[II - 1].Direction = Dependence::DVEntry::EQ;
+    }
+}
+
+static bool normalize(std::vector<Dependence::DVEntry> &DV, ScalarEvolution *SE,
+                      Instruction *Src, Instruction *Dst) {
+  updateUndistributedLoopDirection(DV, SE, Src, Dst);
+
+  if (!isDirectionNegative(DV))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
+             dumpDirection(dbgs(), DV););
+  for (unsigned Level = 1; Level <= DV.size(); ++Level) {
+    unsigned char Direction = DV[Level - 1].Direction;
+    // Reverse the direction vector, this means LT becomes GT
+    // and GT becomes LT.
+    unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
+    if (Direction & Dependence::DVEntry::LT)
+      RevDirection |= Dependence::DVEntry::GT;
+    if (Direction & Dependence::DVEntry::GT)
+      RevDirection |= Dependence::DVEntry::LT;
+    DV[Level - 1].Direction = RevDirection;
+  }
+
+  LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
+             dumpDirection(dbgs(), DV););
+  return true;
+}
+
 static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
                                      Loop *L, DependenceInfo *DI,
                                      ScalarEvolution *SE) {
@@ -123,23 +218,28 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
       // Track Output, Flow, and Anti dependencies.
       if (auto D = DI->depends(Src, Dst, true)) {
         assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+
+        unsigned Levels = D->getLevels();
+        std::vector<Dependence::DVEntry> DV(Levels);
+        for (unsigned II = 1; II <= Levels; ++II)
+          DV[II - 1].Direction = D->getDirection(II);
         // If the direction vector is negative, normalize it to
         // make it non-negative.
-        if (D->normalize(SE))
+        if (normalize(DV, SE, Src, Dst))
           LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
-        LLVM_DEBUG(StringRef DepType =
-                       D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
+        LLVM_DEBUG(StringRef DepType = D->isFlow()   ? "flow"
+                                       : D->isAnti() ? "anti"
+                                                     : "output";
                    dbgs() << "Found " << DepType
                           << " dependency between Src and Dst\n"
                           << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
-        unsigned Levels = D->getLevels();
         char Direction;
         for (unsigned II = 1; II <= Levels; ++II) {
           if (D->isScalar(II)) {
             Direction = 'S';
             Dep.push_back(Direction);
           } else {
-            unsigned Dir = D->getDirection(II);
+            unsigned Dir = DV[II - 1].Direction;
             if (Dir == Dependence::DVEntry::LT ||
                 Dir == Dependence::DVEntry::LE)
               Direction = '<';

diff --git a/llvm/test/Transforms/LoopInterchange/pr71519.ll b/llvm/test/Transforms/LoopInterchange/pr71519.ll
@@ -0,0 +1,67 @@
+; REQUIRES: asserts
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
+; RUN:     -S -debug 2>&1 | FileCheck %s
+
+@aa = global [256 x [256 x float]] zeroinitializer, align 64
+@bb = global [256 x [256 x float]] zeroinitializer, align 64
+
+;; This test check that the direction of level 1 loop dependence between
+;; aa[j][i] and aa[j - 1][i] will be set to '=' instead of default '*'.
+;; Because level 1 loop IV(nl) won't affect the value of aa[j][i] and
+;; aa[j - 1][i].
+;; By setting to '=' will enable normalize and legalize the interchange.
+;;
+;;  for (int nl = 0; nl < 10000000/256; nl++) // Level 1
+;;    for (int i = 0; i < 256; ++i)           // Level 2
+;;      for (int j = 1; j < 256; j++)         // Level 3
+;;        aa[j][i] = aa[j - 1][i] + bb[j][i];
+
+; CHECK: Set level 1 loop direction to =
+; CHECK: Before normalizing negative direction vectors:
+; CHECK:  [= = >]
+; CHECK: After normalizing negative direction vectors:
+; CGECK:  [= = <]
+; CHECK: Negative dependence vector normalized.
+; CHECK: Loops interchanged.
+
+define float @s231() {
+entry:
+  br label %for.cond1.preheader
+
+; Loop:
+for.cond1.preheader:                              ; preds = %entry, %for.cond.cleanup3
+  %nl.036 = phi i32 [ 0, %entry ], [ %inc23, %for.cond.cleanup3 ]
+  br label %for.cond5.preheader
+
+for.cond.cleanup3:                                ; preds = %for.cond.cleanup7
+  %inc23 = add nuw nsw i32 %nl.036, 1
+  %exitcond41 = icmp ne i32 %inc23, 39062
+  br i1 %exitcond41, label %for.cond1.preheader, label %for.cond.cleanup
+
+for.cond.cleanup7:                                ; preds = %for.body8
+  %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
+  %exitcond40 = icmp ne i64 %indvars.iv.next39, 256
+  br i1 %exitcond40, label %for.cond5.preheader, label %for.cond.cleanup3
+
+for.body8:                                        ; preds = %for.cond5.preheader, %for.body8
+  %indvars.iv = phi i64 [ 1, %for.cond5.preheader ], [ %indvars.iv.next, %for.body8 ]
+  %0 = add nsw i64 %indvars.iv, -1
+  %arrayidx10 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %0, i64 %indvars.iv38
+  %1 = load float, ptr %arrayidx10, align 4
+  %arrayidx14 = getelementptr inbounds [256 x [256 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+  %2 = load float, ptr %arrayidx14, align 4
+  %add = fadd fast float %2, %1
+  %arrayidx18 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %indvars.iv, i64 %indvars.iv38
+  store float %add, ptr %arrayidx18, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 256
+  br i1 %exitcond, label %for.body8, label %for.cond.cleanup7
+
+for.cond5.preheader:                              ; preds = %for.cond1.preheader, %for.cond.cleanup7
+  %indvars.iv38 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next39, %for.cond.cleanup7 ]
+  br label %for.body8
+
+; Exit blocks
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
+  ret float undef
+}