Skip to content

Commit 56c3e9b

Browse files
committed
Don't reject the whole loopnest, fixed up and added test case.
1 parent 5d9326a commit 56c3e9b

File tree

5 files changed

+168
-57
lines changed

5 files changed

+168
-57
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 42 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "llvm/Transforms/Utils/Local.h"
4747
#include "llvm/Transforms/Utils/LoopUtils.h"
4848
#include <cassert>
49+
#include <map>
4950
#include <utility>
5051
#include <vector>
5152

@@ -428,22 +429,15 @@ static bool hasSupportedLoopDepth(ArrayRef<Loop *> LoopList,
428429
}
429430

430431
static bool isComputableLoopNest(ScalarEvolution *SE,
431-
ArrayRef<Loop *> LoopList) {
432+
ArrayRef<Loop *> LoopList,
433+
std::map<const Loop *, const SCEV *> &LoopBTC) {
432434
for (Loop *L : LoopList) {
433435
const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
436+
LoopBTC[L] = ExitCountOuter;
434437
if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
435438
LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
436439
return false;
437440
}
438-
// A loop with a backedge that isn't taken, e.g. an unconditional branch
439-
// true, isn't really a loop and we don't want to consider it as a
440-
// candidate.
441-
if (ExitCountOuter && /*SkipLoopsWithZeroBTC && */ExitCountOuter->isZero()) {
442-
LLVM_DEBUG(dbgs() << "The loop back-edge isn't taken, rejecting single "
443-
"iteration loop\n");
444-
LLVM_DEBUG(L->dump());
445-
return false;
446-
}
447441
if (L->getNumBackEdges() != 1) {
448442
LLVM_DEBUG(dbgs() << "NumBackEdges is not equal to 1\n");
449443
return false;
@@ -561,7 +555,8 @@ class LoopInterchangeProfitability {
561555
/// Check if the loop interchange is profitable.
562556
bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
563557
unsigned InnerLoopId, unsigned OuterLoopId,
564-
CharMatrix &DepMatrix, CacheCostManager &CCM);
558+
CharMatrix &DepMatrix, CacheCostManager &CCM,
559+
std::map<const Loop *, const SCEV *> &LoopBTC);
565560

566561
private:
567562
int getInstrOrderCost();
@@ -618,14 +613,17 @@ struct LoopInterchange {
618613
DependenceInfo *DI = nullptr;
619614
DominatorTree *DT = nullptr;
620615
LoopStandardAnalysisResults *AR = nullptr;
621-
622616
/// Interface to emit optimization remarks.
623617
OptimizationRemarkEmitter *ORE;
618+
// A cache to avoid recalculating the backedge-taken count for a loop.
619+
std::map<const Loop *, const SCEV *> LoopBTC;
624620

625621
LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
626622
DominatorTree *DT, LoopStandardAnalysisResults *AR,
627-
OptimizationRemarkEmitter *ORE)
628-
: SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
623+
OptimizationRemarkEmitter *ORE,
624+
std::map<const Loop *, const SCEV *> &&LoopBTC)
625+
: SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE),
626+
LoopBTC(std::move(LoopBTC)) {}
629627

630628
bool run(Loop *L) {
631629
if (L->getParentLoop())
@@ -717,7 +715,7 @@ struct LoopInterchange {
717715
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
718716
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
719717
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
720-
DependencyMatrix, CCM)) {
718+
DependencyMatrix, CCM, LoopBTC)) {
721719
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
722720
return false;
723721
}
@@ -1477,7 +1475,30 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(
14771475

14781476
bool LoopInterchangeProfitability::isProfitable(
14791477
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
1480-
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
1478+
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM,
1479+
std::map<const Loop *, const SCEV *> &LoopBTC) {
1480+
1481+
auto *InnerBTC = LoopBTC[InnerLoop];
1482+
auto *OuterBTC = LoopBTC[OuterLoop];
1483+
assert(InnerBTC && OuterBTC &&
1484+
"Loop BTC should exist in cache but not found");
1485+
// A loop with a backedge that isn't taken, e.g. an unconditional branch
1486+
// true, isn't really a loop and we don't want to consider it as a
1487+
// candidate.
1488+
// TODO: when interchange is forced, we should probably also allow
1489+
// interchange for these loops, and thus this logic should be moved just
1490+
// below the cost-model ignore check below. But this check is done first
1491+
// to avoid the issue in #163954.
1492+
if (InnerBTC && InnerBTC->isZero()) {
1493+
LLVM_DEBUG(dbgs() << "Inner loop back-edge isn't taken, rejecting "
1494+
"single iteration loop\n");
1495+
return false;
1496+
}
1497+
if (OuterBTC && OuterBTC->isZero()) {
1498+
LLVM_DEBUG(dbgs() << "Outer loop back-edge isn't taken, rejecting "
1499+
"single iteration loop\n");
1500+
return false;
1501+
}
14811502

14821503
// Return true if interchange is forced and the cost-model ignored.
14831504
if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
@@ -2114,6 +2135,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
21142135
LPMUpdater &U) {
21152136
Function &F = *LN.getParent();
21162137
SmallVector<Loop *, 8> LoopList(LN.getLoops());
2138+
std::map<const Loop *, const SCEV *> LoopBTC;
21172139

21182140
if (MaxMemInstrCount < 1) {
21192141
LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
@@ -2125,7 +2147,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
21252147
if (!hasSupportedLoopDepth(LoopList, ORE))
21262148
return PreservedAnalyses::all();
21272149
// Ensure computable loop nest.
2128-
if (!isComputableLoopNest(&AR.SE, LoopList)) {
2150+
if (!isComputableLoopNest(&AR.SE, LoopList, LoopBTC)) {
21292151
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
21302152
return PreservedAnalyses::all();
21312153
}
@@ -2138,7 +2160,9 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
21382160
});
21392161

21402162
DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
2141-
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
2163+
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE,
2164+
std::move(LoopBTC))
2165+
.run(LN))
21422166
return PreservedAnalyses::all();
21432167
U.markLoopNestChanged(true);
21442168
return getLoopPassPreservedAnalyses();
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
2+
; RUN: -pass-remarks-output=%t -pass-remarks='loop-interchange' -S
3+
; RUN: cat %t | FileCheck %s
4+
5+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
6+
7+
@D = common global [100 x [100 x [100 x i32]]] zeroinitializer
8+
9+
; Test for interchange in
10+
;
11+
; for(int i=0;i<1;i++)
12+
; for(int j=0;j<100;j++)
13+
; for(int k=0;k<100;k++)
14+
; D[i][k][j] = D[i][k][j]+t;
15+
;
16+
17+
; CHECK: --- !Analysis
18+
; CHECK-NEXT: Pass: loop-interchange
19+
; CHECK-NEXT: Name: Dependence
20+
; CHECK-NEXT: Function: interchange_i_and_j
21+
; CHECK-NEXT: Args:
22+
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
23+
; CHECK-NEXT: ...
24+
; CHECK-NEXT: --- !Passed
25+
; CHECK-NEXT: Pass: loop-interchange
26+
; CHECK-NEXT: Name: Interchanged
27+
; CHECK-NEXT: Function: interchange_i_and_j
28+
; CHECK-NEXT: Args:
29+
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
30+
; CHECK-NEXT: ...
31+
; CHECK-NEXT: --- !Missed
32+
; CHECK-NEXT: Pass: loop-interchange
33+
; CHECK-NEXT: Name: InterchangeNotProfitable
34+
; CHECK-NEXT: Function: interchange_i_and_j
35+
; CHECK-NEXT: Args:
36+
; CHECK-NEXT: - String: Insufficient information to calculate the cost of loop for interchange.
37+
; CHECK-NEXT: ...
38+
39+
define void @interchange_i_and_j(i32 %t){
40+
entry:
41+
br label %outer.header
42+
43+
outer.header:
44+
%i = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
45+
br label %inner1.header
46+
47+
inner1.header:
48+
%j = phi i64 [ 0, %outer.header ], [ %inc13, %for.inc12 ]
49+
br label %inner2.body
50+
51+
inner2.body:
52+
%k = phi i64 [ 0, %inner1.header ], [ %inc, %inner2.body ]
53+
%arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr @D, i64 0, i64 %i, i64 %k, i64 %j
54+
%0 = load i32, ptr %arrayidx8
55+
%add = add nsw i32 %0, %t
56+
store i32 %add, ptr %arrayidx8
57+
%inc = add nuw nsw i64 %k, 1
58+
%exitcond = icmp eq i64 %inc, 100
59+
br i1 %exitcond, label %for.inc12, label %inner2.body
60+
61+
for.inc12:
62+
%inc13 = add nuw nsw i64 %j, 1
63+
%exitcond29 = icmp eq i64 %inc13, 100
64+
br i1 %exitcond29, label %for.inc15, label %inner1.header
65+
66+
for.inc15:
67+
%inc16 = add nuw nsw i64 %i, 1
68+
%exitcond30 = icmp eq i64 %inc16, 1
69+
br i1 %exitcond30, label %for.end17, label %outer.header
70+
71+
for.end17:
72+
ret void
73+
}

llvm/test/Transforms/LoopInterchange/pr43326.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ for.end: ; preds = %for.inc
6464

6565
for.inc10: ; preds = %for.end
6666
%j.next = add i8 %j, -1
67-
%cmp = icmp sgt i8 %j.next, -1
67+
%cmp = icmp sgt i8 %j.next, -10
6868
br i1 %cmp, label %inner1.header, label %for.end11
6969

7070
for.end11: ; preds = %for.inc10
@@ -75,8 +75,8 @@ for.end11: ; preds = %for.inc10
7575

7676
for.inc12: ; preds = %for.end11
7777
%inc13 = add nsw i32 %inc1312, 1
78-
%tobool.not = icmp eq i32 %inc13, 0
79-
br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %outer.header
78+
%tobool.not = icmp slt i32 %inc13, 42
79+
br i1 %tobool.not, label %outer.header, label %for.cond.for.end14_crit_edge
8080

8181
for.cond.for.end14_crit_edge: ; preds = %for.inc12
8282
%inc13.lcssa = phi i32 [ %inc13, %for.inc12 ]

0 commit comments

Comments
 (0)