Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 42 additions & 9 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include <cassert>
#include <map>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include <map>

Maybe unnecessary?

#include <utility>
#include <vector>

Expand Down Expand Up @@ -421,9 +422,11 @@ static bool hasSupportedLoopDepth(ArrayRef<Loop *> LoopList,
}

static bool isComputableLoopNest(ScalarEvolution *SE,
ArrayRef<Loop *> LoopList) {
ArrayRef<Loop *> LoopList,
std::map<const Loop *, const SCEV *> &LoopBTC) {
for (Loop *L : LoopList) {
const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
LoopBTC[L] = ExitCountOuter;
if (isa<SCEVCouldNotCompute>(ExitCountOuter)) {
LLVM_DEBUG(dbgs() << "Couldn't compute backedge count\n");
return false;
Expand Down Expand Up @@ -545,7 +548,8 @@ class LoopInterchangeProfitability {
/// Check if the loop interchange is profitable.
bool isProfitable(const Loop *InnerLoop, const Loop *OuterLoop,
unsigned InnerLoopId, unsigned OuterLoopId,
CharMatrix &DepMatrix, CacheCostManager &CCM);
CharMatrix &DepMatrix, CacheCostManager &CCM,
std::map<const Loop *, const SCEV *> &LoopBTC);

private:
int getInstrOrderCost();
Expand Down Expand Up @@ -602,14 +606,17 @@ struct LoopInterchange {
DependenceInfo *DI = nullptr;
DominatorTree *DT = nullptr;
LoopStandardAnalysisResults *AR = nullptr;

/// Interface to emit optimization remarks.
OptimizationRemarkEmitter *ORE;
// A cache to avoid recalculating the backedge-taken count for a loop.
std::map<const Loop *, const SCEV *> LoopBTC;

LoopInterchange(ScalarEvolution *SE, LoopInfo *LI, DependenceInfo *DI,
DominatorTree *DT, LoopStandardAnalysisResults *AR,
OptimizationRemarkEmitter *ORE)
: SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE) {}
OptimizationRemarkEmitter *ORE,
std::map<const Loop *, const SCEV *> &&LoopBTC)
: SE(SE), LI(LI), DI(DI), DT(DT), AR(AR), ORE(ORE),
LoopBTC(std::move(LoopBTC)) {}

bool run(Loop *L) {
if (L->getParentLoop())
Expand Down Expand Up @@ -701,7 +708,7 @@ struct LoopInterchange {
LLVM_DEBUG(dbgs() << "Loops are legal to interchange\n");
LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE, ORE);
if (!LIP.isProfitable(InnerLoop, OuterLoop, InnerLoopId, OuterLoopId,
DependencyMatrix, CCM)) {
DependencyMatrix, CCM, LoopBTC)) {
LLVM_DEBUG(dbgs() << "Interchanging loops not profitable.\n");
return false;
}
Expand Down Expand Up @@ -1461,7 +1468,30 @@ std::optional<bool> LoopInterchangeProfitability::isProfitableForVectorization(

bool LoopInterchangeProfitability::isProfitable(
const Loop *InnerLoop, const Loop *OuterLoop, unsigned InnerLoopId,
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM) {
unsigned OuterLoopId, CharMatrix &DepMatrix, CacheCostManager &CCM,
std::map<const Loop *, const SCEV *> &LoopBTC) {

auto *InnerBTC = LoopBTC[InnerLoop];
auto *OuterBTC = LoopBTC[OuterLoop];
assert(InnerBTC && OuterBTC &&
"Loop BTC should exist in cache but not found");
// A loop with a backedge that isn't taken, e.g. an unconditional branch
// true, isn't really a loop and we don't want to consider it as a
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is a loop by definition, even if BTC=0. I feel this comment is a bit misleading.

// candidate.
// TODO: when interchange is forced, we should probably also allow
// interchange for these loops, and thus this logic should be moved just
// below the cost-model ignore check below. But this check is done first
// to avoid the issue in #163954.
if (InnerBTC && InnerBTC->isZero()) {
LLVM_DEBUG(dbgs() << "Inner loop back-edge isn't taken, rejecting "
"single iteration loop\n");
return false;
}
if (OuterBTC && OuterBTC->isZero()) {
LLVM_DEBUG(dbgs() << "Outer loop back-edge isn't taken, rejecting "
"single iteration loop\n");
return false;
}

// Return true if interchange is forced and the cost-model ignored.
if (Profitabilities.size() == 1 && Profitabilities[0] == RuleTy::Ignore)
Expand Down Expand Up @@ -2098,6 +2128,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
LPMUpdater &U) {
Function &F = *LN.getParent();
SmallVector<Loop *, 8> LoopList(LN.getLoops());
std::map<const Loop *, const SCEV *> LoopBTC;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ScalarEvolution caches the results of BTC. In general, it would be better to call getBackedgeTakenCount every time than holding local cache unless there are special circumstances.


if (MaxMemInstrCount < 1) {
LLVM_DEBUG(dbgs() << "MaxMemInstrCount should be at least 1");
Expand All @@ -2109,7 +2140,7 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
if (!hasSupportedLoopDepth(LoopList, ORE))
return PreservedAnalyses::all();
// Ensure computable loop nest.
if (!isComputableLoopNest(&AR.SE, LoopList)) {
if (!isComputableLoopNest(&AR.SE, LoopList, LoopBTC)) {
LLVM_DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
return PreservedAnalyses::all();
}
Expand All @@ -2122,7 +2153,9 @@ PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
});

DependenceInfo DI(&F, &AR.AA, &AR.SE, &AR.LI);
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE).run(LN))
if (!LoopInterchange(&AR.SE, &AR.LI, &DI, &AR.DT, &AR, &ORE,
std::move(LoopBTC))
.run(LN))
return PreservedAnalyses::all();
U.markLoopNestChanged(true);
return getLoopPassPreservedAnalyses();
Expand Down
73 changes: 73 additions & 0 deletions llvm/test/Transforms/LoopInterchange/loopnest-with-outer-btc0.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
; RUN: -pass-remarks-output=%t -pass-remarks='loop-interchange' -S
; RUN: cat %t | FileCheck %s

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

@D = common global [100 x [100 x [100 x i32]]] zeroinitializer

; Test for interchange in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a brief description to clarify the purpose of this test? Like "check the loop with BTC=0 is considered unprofitable".

;
; for(int i=0;i<1;i++)
; for(int j=0;j<100;j++)
; for(int k=0;k<100;k++)
; D[i][k][j] = D[i][k][j]+t;
;

; CHECK: --- !Analysis
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Dependence
; CHECK-NEXT: Function: interchange_i_and_j
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Computed dependence info, invoking the transform.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Passed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: Interchanged
; CHECK-NEXT: Function: interchange_i_and_j
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
; CHECK-NEXT: ...
; CHECK-NEXT: --- !Missed
; CHECK-NEXT: Pass: loop-interchange
; CHECK-NEXT: Name: InterchangeNotProfitable
; CHECK-NEXT: Function: interchange_i_and_j
; CHECK-NEXT: Args:
; CHECK-NEXT: - String: Insufficient information to calculate the cost of loop for interchange.
; CHECK-NEXT: ...

define void @interchange_i_and_j(i32 %t){
entry:
br label %outer.header

outer.header:
%i = phi i64 [ 0, %entry ], [ %inc16, %for.inc15 ]
br label %inner1.header

inner1.header:
%j = phi i64 [ 0, %outer.header ], [ %inc13, %for.inc12 ]
br label %inner2.body

inner2.body:
%k = phi i64 [ 0, %inner1.header ], [ %inc, %inner2.body ]
%arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr @D, i64 0, i64 %i, i64 %k, i64 %j
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
%arrayidx8 = getelementptr inbounds [100 x [100 x [100 x i32]]], ptr @D, i64 0, i64 %i, i64 %k, i64 %j
%arrayidx8 = getelementptr inbounds [100 x [100 x i32]], ptr @D, i64 %i, i64 %k, i64 %j

I remember hearing somewhere that this is the canonical form.

%0 = load i32, ptr %arrayidx8
%add = add nsw i32 %0, %t
store i32 %add, ptr %arrayidx8
%inc = add nuw nsw i64 %k, 1
%exitcond = icmp eq i64 %inc, 100
br i1 %exitcond, label %for.inc12, label %inner2.body

for.inc12:
%inc13 = add nuw nsw i64 %j, 1
%exitcond29 = icmp eq i64 %inc13, 100
br i1 %exitcond29, label %for.inc15, label %inner1.header

for.inc15:
%inc16 = add nuw nsw i64 %i, 1
%exitcond30 = icmp eq i64 %inc16, 1
br i1 %exitcond30, label %for.end17, label %outer.header

for.end17:
ret void
}
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopInterchange/pr43326.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ for.end: ; preds = %for.inc

for.inc10: ; preds = %for.end
%j.next = add i8 %j, -1
%cmp = icmp sgt i8 %j.next, -1
%cmp = icmp sgt i8 %j.next, -10
br i1 %cmp, label %inner1.header, label %for.end11

for.end11: ; preds = %for.inc10
Expand All @@ -75,8 +75,8 @@ for.end11: ; preds = %for.inc10

for.inc12: ; preds = %for.end11
%inc13 = add nsw i32 %inc1312, 1
%tobool.not = icmp eq i32 %inc13, 0
br i1 %tobool.not, label %for.cond.for.end14_crit_edge, label %outer.header
%tobool.not = icmp slt i32 %inc13, 42
br i1 %tobool.not, label %outer.header, label %for.cond.for.end14_crit_edge

for.cond.for.end14_crit_edge: ; preds = %for.inc12
%inc13.lcssa = phi i32 [ %inc13, %for.inc12 ]
Expand Down
Loading
Loading