diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 277f530ee25fc..c5b703282f268 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -82,6 +82,101 @@ static void printDepMatrix(CharMatrix &DepMatrix) { } #endif +static bool isDirectionNegative(std::vector &DV) { + for (unsigned Level = 1; Level <= DV.size(); ++Level) { + unsigned char Direction = DV[Level - 1].Direction; + if (Direction == Dependence::DVEntry::EQ) + continue; + if (Direction == Dependence::DVEntry::GT || + Direction == Dependence::DVEntry::GE) + return true; + return false; + } + return false; +} + +static void dumpDirection(raw_ostream &OS, + std::vector &DV) { + OS << " ["; + for (unsigned II = 1; II <= DV.size(); ++II) { + unsigned Direction = DV[II - 1].Direction; + if (Direction == Dependence::DVEntry::ALL) + OS << "*"; + else { + if (Direction & Dependence::DVEntry::LT) + OS << "<"; + if (Direction & Dependence::DVEntry::EQ) + OS << "="; + if (Direction & Dependence::DVEntry::GT) + OS << ">"; + } + if (II < DV.size()) + OS << " "; + } + OS << "]\n"; +} + +// Get the Loops will affect Expr result. +static void getAffectedLoop(const SCEV *Expr, SmallBitVector &Loops, + ScalarEvolution *SE) { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) + return; + + Loops.set(AddRec->getLoop()->getLoopDepth()); + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(*SE); + getAffectedLoop(Start, Loops, SE); + getAffectedLoop(Step, Loops, SE); +} + +// Update the Direction of undistributed loop to EQ. +static void +updateUndistributedLoopDirection(std::vector &DV, + ScalarEvolution *SE, Instruction *Src, + Instruction *Dst) { + SmallBitVector DistributedLoops(DV.size() + 1); + Value *SrcPtr = getLoadStorePointerOperand(Src); + Value *DstPtr = getLoadStorePointerOperand(Dst); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + getAffectedLoop(SrcSCEV, DistributedLoops, SE); + getAffectedLoop(DstSCEV, DistributedLoops, SE); + for (unsigned II = 1; II <= DV.size(); ++II) + // Set the direction of the loop to EQ if the loop won't affect the + // SCEV of Src and Dst. + if (!DistributedLoops.test(II)) { + LLVM_DEBUG(dbgs() << "Set level " << II << " loop direction to =\n"); + DV[II - 1].Direction = Dependence::DVEntry::EQ; + } +} + +static bool normalize(std::vector &DV, ScalarEvolution *SE, + Instruction *Src, Instruction *Dst) { + updateUndistributedLoopDirection(DV, SE, Src, Dst); + + if (!isDirectionNegative(DV)) + return false; + + LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n"; + dumpDirection(dbgs(), DV);); + for (unsigned Level = 1; Level <= DV.size(); ++Level) { + unsigned char Direction = DV[Level - 1].Direction; + // Reverse the direction vector, this means LT becomes GT + // and GT becomes LT. + unsigned char RevDirection = Direction & Dependence::DVEntry::EQ; + if (Direction & Dependence::DVEntry::LT) + RevDirection |= Dependence::DVEntry::GT; + if (Direction & Dependence::DVEntry::GT) + RevDirection |= Dependence::DVEntry::LT; + DV[Level - 1].Direction = RevDirection; + } + + LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n"; + dumpDirection(dbgs(), DV);); + return true; +} + static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE) { @@ -123,23 +218,28 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, // Track Output, Flow, and Anti dependencies. if (auto D = DI->depends(Src, Dst, true)) { assert(D->isOrdered() && "Expected an output, flow or anti dep."); + + unsigned Levels = D->getLevels(); + std::vector DV(Levels); + for (unsigned II = 1; II <= Levels; ++II) + DV[II - 1].Direction = D->getDirection(II); // If the direction vector is negative, normalize it to // make it non-negative. - if (D->normalize(SE)) + if (normalize(DV, SE, Src, Dst)) LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n"); - LLVM_DEBUG(StringRef DepType = - D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; + LLVM_DEBUG(StringRef DepType = D->isFlow() ? "flow" + : D->isAnti() ? "anti" + : "output"; dbgs() << "Found " << DepType << " dependency between Src and Dst\n" << " Src:" << *Src << "\n Dst:" << *Dst << '\n'); - unsigned Levels = D->getLevels(); char Direction; for (unsigned II = 1; II <= Levels; ++II) { if (D->isScalar(II)) { Direction = 'S'; Dep.push_back(Direction); } else { - unsigned Dir = D->getDirection(II); + unsigned Dir = DV[II - 1].Direction; if (Dir == Dependence::DVEntry::LT || Dir == Dependence::DVEntry::LE) Direction = '<'; diff --git a/llvm/test/Transforms/LoopInterchange/pr71519.ll b/llvm/test/Transforms/LoopInterchange/pr71519.ll new file mode 100644 index 0000000000000..e2d34662ef264 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr71519.ll @@ -0,0 +1,67 @@ +; REQUIRES: asserts +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s + +@aa = global [256 x [256 x float]] zeroinitializer, align 64 +@bb = global [256 x [256 x float]] zeroinitializer, align 64 + +;; This test check that the direction of level 1 loop dependence between +;; aa[j][i] and aa[j - 1][i] will be set to '=' instead of default '*'. +;; Because level 1 loop IV(nl) won't affect the value of aa[j][i] and +;; aa[j - 1][i]. +;; By setting to '=' will enable normalize and legalize the interchange. +;; +;; for (int nl = 0; nl < 10000000/256; nl++) // Level 1 +;; for (int i = 0; i < 256; ++i) // Level 2 +;; for (int j = 1; j < 256; j++) // Level 3 +;; aa[j][i] = aa[j - 1][i] + bb[j][i]; + +; CHECK: Set level 1 loop direction to = +; CHECK: Before normalizing negative direction vectors: +; CHECK: [= = >] +; CHECK: After normalizing negative direction vectors: +; CGECK: [= = <] +; CHECK: Negative dependence vector normalized. +; CHECK: Loops interchanged. + +define float @s231() { +entry: + br label %for.cond1.preheader + +; Loop: +for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3 + %nl.036 = phi i32 [ 0, %entry ], [ %inc23, %for.cond.cleanup3 ] + br label %for.cond5.preheader + +for.cond.cleanup3: ; preds = %for.cond.cleanup7 + %inc23 = add nuw nsw i32 %nl.036, 1 + %exitcond41 = icmp ne i32 %inc23, 39062 + br i1 %exitcond41, label %for.cond1.preheader, label %for.cond.cleanup + +for.cond.cleanup7: ; preds = %for.body8 + %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1 + %exitcond40 = icmp ne i64 %indvars.iv.next39, 256 + br i1 %exitcond40, label %for.cond5.preheader, label %for.cond.cleanup3 + +for.body8: ; preds = %for.cond5.preheader, %for.body8 + %indvars.iv = phi i64 [ 1, %for.cond5.preheader ], [ %indvars.iv.next, %for.body8 ] + %0 = add nsw i64 %indvars.iv, -1 + %arrayidx10 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %0, i64 %indvars.iv38 + %1 = load float, ptr %arrayidx10, align 4 + %arrayidx14 = getelementptr inbounds [256 x [256 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv38 + %2 = load float, ptr %arrayidx14, align 4 + %add = fadd fast float %2, %1 + %arrayidx18 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %indvars.iv, i64 %indvars.iv38 + store float %add, ptr %arrayidx18, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 256 + br i1 %exitcond, label %for.body8, label %for.cond.cleanup7 + +for.cond5.preheader: ; preds = %for.cond1.preheader, %for.cond.cleanup7 + %indvars.iv38 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next39, %for.cond.cleanup7 ] + br label %for.body8 + +; Exit blocks +for.cond.cleanup: ; preds = %for.cond.cleanup3 + ret float undef +}