From f3698af4ea554ec6abedf0bde099237fe3227247 Mon Sep 17 00:00:00 2001 From: Shiva Chen Date: Tue, 23 Jan 2024 07:44:54 +0000 Subject: [PATCH 1/2] Add pr54176.ll --- .../Transforms/LoopInterchange/pr54176.ll | 51 +++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 llvm/test/Transforms/LoopInterchange/pr54176.ll diff --git a/llvm/test/Transforms/LoopInterchange/pr54176.ll b/llvm/test/Transforms/LoopInterchange/pr54176.ll new file mode 100644 index 0000000000000..6047ece119f1b --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr54176.ll @@ -0,0 +1,51 @@ +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \ +; RUN: -S -debug 2>&1 | FileCheck %s + +@bb = global [1024 x [128 x float]] zeroinitializer, align 4 +@aa = global [1024 x [128 x float]] zeroinitializer, align 4 +@cc = global [1024 x [128 x float]] zeroinitializer, align 4 + + +;; for (int j = 1; j < M; j++) +;; for (int i = 1; i < N; i++) { +;; aa[1][j-1] += bb[i][j]; +;; cc[i][j] = aa[1][j]; +;; } + +; CHECK: Loops interchanged. + +define void @pr54176() { +entry: + br label %for.cond1.preheader + +; Loop: +for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3 + %indvars.iv28 = phi i64 [ 1, %entry ], [ %indvars.iv.next29, %for.cond.cleanup3 ] + %0 = add nsw i64 %indvars.iv28, -1 + %arrayidx8 = getelementptr inbounds [1024 x [128 x float]], ptr @aa, i64 0, i64 1, i64 %0 + %arrayidx10 = getelementptr inbounds [1024 x [128 x float]], ptr @aa, i64 0, i64 1, i64 %indvars.iv28 + br label %for.body4 + +for.cond.cleanup3: ; preds = %for.body4 + %indvars.iv.next29 = add nuw nsw i64 %indvars.iv28, 1 + %exitcond31 = icmp ne i64 %indvars.iv.next29, 128 + br i1 %exitcond31, label %for.cond1.preheader, label %for.cond.cleanup + +for.body4: ; preds = %for.cond1.preheader, %for.body4 + %indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ] + %arrayidx6 = getelementptr inbounds [1024 x [128 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv28 + %1 = load float, ptr %arrayidx6, align 4 + %2 = load float, ptr %arrayidx8, align 4 + %add = fadd float %1, %2 + store float %add, ptr %arrayidx8, align 4 + %3 = load float, ptr %arrayidx10, align 4 + %arrayidx14 = getelementptr inbounds [1024 x [128 x float]], ptr @cc, i64 0, i64 %indvars.iv, i64 %indvars.iv28 + store float %3, ptr %arrayidx14, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.body4, label %for.cond.cleanup3 + +; Exit blocks +for.cond.cleanup: ; preds = %for.cond.cleanup3 + ret void +} From c6600b68114e117c10081041aab40075583e156e Mon Sep 17 00:00:00 2001 From: Shiva Chen Date: Tue, 6 Feb 2024 07:45:46 +0000 Subject: [PATCH 2/2] [LoopInterchange] Prevent interchange for constant index with loop carried dependence --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 35 +++++++++++++++++++ .../Transforms/LoopInterchange/pr54176.ll | 3 +- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 277f530ee25fc..f25ff8388baaa 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -82,6 +82,34 @@ static void printDepMatrix(CharMatrix &DepMatrix) { } #endif +static unsigned getAffectedLoopNum(const SCEV *Expr, ScalarEvolution &SE) { + const SCEVAddRecExpr *AddRec = dyn_cast(Expr); + if (!AddRec) + return 0; + + const SCEV *Start = AddRec->getStart(); + const SCEV *Step = AddRec->getStepRecurrence(SE); + return getAffectedLoopNum(Start, SE) + getAffectedLoopNum(Step, SE) + 1; +} + +static bool hasConstantIndex(Instruction *Src, Instruction *Dst, + ScalarEvolution *SE) { + Value *PtrSrc = getLoadStorePointerOperand(Src); + Value *PtrDst = getLoadStorePointerOperand(Dst); + const SCEV *SrcSCEV = SE->getSCEV(PtrSrc); + const SCEV *DstSCEV = SE->getSCEV(PtrDst); + + unsigned SrcLoops = getAffectedLoopNum(SrcSCEV, *SE); + unsigned DstLoops = getAffectedLoopNum(DstSCEV, *SE); + // Loop interchange would need at least two loops. If the SCEV form + // only has one loop or the loop numbers are not equal. There will be + // at least one constant index. + if (SrcLoops == 1 || DstLoops == 1 || SrcLoops != DstLoops) + return true; + + return false; +} + static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Loop *L, DependenceInfo *DI, ScalarEvolution *SE) { @@ -150,6 +178,13 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, Direction = '='; else Direction = '*'; + + if (hasConstantIndex(Src, Dst, SE) && + (Direction == '>' || Direction == '<')) { + LLVM_DEBUG(dbgs() << "Has constant index with loop carried" + << " dependencies inside loop\n"); + return false; + } Dep.push_back(Direction); } } diff --git a/llvm/test/Transforms/LoopInterchange/pr54176.ll b/llvm/test/Transforms/LoopInterchange/pr54176.ll index 6047ece119f1b..63fb9ba3571ab 100644 --- a/llvm/test/Transforms/LoopInterchange/pr54176.ll +++ b/llvm/test/Transforms/LoopInterchange/pr54176.ll @@ -12,7 +12,8 @@ ;; cc[i][j] = aa[1][j]; ;; } -; CHECK: Loops interchanged. +; CHECK: Has constant index with loop carried dependencies inside loop +; CHECK: Populating dependency matrix failed define void @pr54176() { entry: