Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 105 additions & 5 deletions llvm/lib/Transforms/Scalar/LoopInterchange.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,101 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
}
#endif

static bool isDirectionNegative(std::vector<Dependence::DVEntry> &DV) {
for (unsigned Level = 1; Level <= DV.size(); ++Level) {
unsigned char Direction = DV[Level - 1].Direction;
if (Direction == Dependence::DVEntry::EQ)
continue;
if (Direction == Dependence::DVEntry::GT ||
Direction == Dependence::DVEntry::GE)
return true;
return false;
}
return false;
}

static void dumpDirection(raw_ostream &OS,
std::vector<Dependence::DVEntry> &DV) {
OS << " [";
for (unsigned II = 1; II <= DV.size(); ++II) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel that we can start from 0 and avoid subtracting 1 each time we access DV but I understand that this is practice in DA.

unsigned Direction = DV[II - 1].Direction;
if (Direction == Dependence::DVEntry::ALL)
OS << "*";
else {
if (Direction & Dependence::DVEntry::LT)
OS << "<";
if (Direction & Dependence::DVEntry::EQ)
OS << "=";
if (Direction & Dependence::DVEntry::GT)
OS << ">";
}
if (II < DV.size())
OS << " ";
}
OS << "]\n";
}

// Get the Loops will affect Expr result.
static void getAffectedLoop(const SCEV *Expr, SmallBitVector &Loops,
ScalarEvolution *SE) {
const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
if (!AddRec)
return;

Loops.set(AddRec->getLoop()->getLoopDepth());
const SCEV *Start = AddRec->getStart();
const SCEV *Step = AddRec->getStepRecurrence(*SE);
getAffectedLoop(Start, Loops, SE);
getAffectedLoop(Step, Loops, SE);
}

// Update the Direction of undistributed loop to EQ.
static void
updateUndistributedLoopDirection(std::vector<Dependence::DVEntry> &DV,
ScalarEvolution *SE, Instruction *Src,
Instruction *Dst) {
SmallBitVector DistributedLoops(DV.size() + 1);
Value *SrcPtr = getLoadStorePointerOperand(Src);
Value *DstPtr = getLoadStorePointerOperand(Dst);
const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
const SCEV *DstSCEV = SE->getSCEV(DstPtr);
getAffectedLoop(SrcSCEV, DistributedLoops, SE);
getAffectedLoop(DstSCEV, DistributedLoops, SE);
for (unsigned II = 1; II <= DV.size(); ++II)
// Set the direction of the loop to EQ if the loop won't affect the
// SCEV of Src and Dst.
if (!DistributedLoops.test(II)) {
LLVM_DEBUG(dbgs() << "Set level " << II << " loop direction to =\n");
DV[II - 1].Direction = Dependence::DVEntry::EQ;
}
}

static bool normalize(std::vector<Dependence::DVEntry> &DV, ScalarEvolution *SE,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am learning this transformation and came across your patch.

This function is sort of duplicate of FullDependece::normalize() but does not update the distance vector. Do you think we should avoid duplication? and updating distance vector is not necessary?

Instruction *Src, Instruction *Dst) {
updateUndistributedLoopDirection(DV, SE, Src, Dst);

if (!isDirectionNegative(DV))
return false;

LLVM_DEBUG(dbgs() << "Before normalizing negative direction vectors:\n";
dumpDirection(dbgs(), DV););
for (unsigned Level = 1; Level <= DV.size(); ++Level) {
unsigned char Direction = DV[Level - 1].Direction;
// Reverse the direction vector, this means LT becomes GT
// and GT becomes LT.
unsigned char RevDirection = Direction & Dependence::DVEntry::EQ;
if (Direction & Dependence::DVEntry::LT)
RevDirection |= Dependence::DVEntry::GT;
if (Direction & Dependence::DVEntry::GT)
RevDirection |= Dependence::DVEntry::LT;
DV[Level - 1].Direction = RevDirection;
}

LLVM_DEBUG(dbgs() << "After normalizing negative direction vectors:\n";
dumpDirection(dbgs(), DV););
return true;
}

static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
Loop *L, DependenceInfo *DI,
ScalarEvolution *SE) {
Expand Down Expand Up @@ -123,23 +218,28 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
// Track Output, Flow, and Anti dependencies.
if (auto D = DI->depends(Src, Dst, true)) {
assert(D->isOrdered() && "Expected an output, flow or anti dep.");

unsigned Levels = D->getLevels();
std::vector<Dependence::DVEntry> DV(Levels);
for (unsigned II = 1; II <= Levels; ++II)
DV[II - 1].Direction = D->getDirection(II);
// If the direction vector is negative, normalize it to
// make it non-negative.
if (D->normalize(SE))
if (normalize(DV, SE, Src, Dst))
LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
LLVM_DEBUG(StringRef DepType =
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
LLVM_DEBUG(StringRef DepType = D->isFlow() ? "flow"
: D->isAnti() ? "anti"
: "output";
dbgs() << "Found " << DepType
<< " dependency between Src and Dst\n"
<< " Src:" << *Src << "\n Dst:" << *Dst << '\n');
unsigned Levels = D->getLevels();
char Direction;
for (unsigned II = 1; II <= Levels; ++II) {
if (D->isScalar(II)) {
Direction = 'S';
Dep.push_back(Direction);
} else {
unsigned Dir = D->getDirection(II);
unsigned Dir = DV[II - 1].Direction;
if (Dir == Dependence::DVEntry::LT ||
Dir == Dependence::DVEntry::LE)
Direction = '<';
Expand Down
67 changes: 67 additions & 0 deletions llvm/test/Transforms/LoopInterchange/pr71519.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
; REQUIRES: asserts
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 -verify-dom-info -verify-loop-info \
; RUN: -S -debug 2>&1 | FileCheck %s

@aa = global [256 x [256 x float]] zeroinitializer, align 64
@bb = global [256 x [256 x float]] zeroinitializer, align 64

;; This test check that the direction of level 1 loop dependence between
;; aa[j][i] and aa[j - 1][i] will be set to '=' instead of default '*'.
;; Because level 1 loop IV(nl) won't affect the value of aa[j][i] and
;; aa[j - 1][i].
;; By setting to '=' will enable normalize and legalize the interchange.
;;
;; for (int nl = 0; nl < 10000000/256; nl++) // Level 1
;; for (int i = 0; i < 256; ++i) // Level 2
;; for (int j = 1; j < 256; j++) // Level 3
;; aa[j][i] = aa[j - 1][i] + bb[j][i];

; CHECK: Set level 1 loop direction to =
; CHECK: Before normalizing negative direction vectors:
; CHECK: [= = >]
; CHECK: After normalizing negative direction vectors:
; CGECK: [= = <]
; CHECK: Negative dependence vector normalized.
; CHECK: Loops interchanged.

define float @s231() {
entry:
br label %for.cond1.preheader

; Loop:
for.cond1.preheader: ; preds = %entry, %for.cond.cleanup3
%nl.036 = phi i32 [ 0, %entry ], [ %inc23, %for.cond.cleanup3 ]
br label %for.cond5.preheader

for.cond.cleanup3: ; preds = %for.cond.cleanup7
%inc23 = add nuw nsw i32 %nl.036, 1
%exitcond41 = icmp ne i32 %inc23, 39062
br i1 %exitcond41, label %for.cond1.preheader, label %for.cond.cleanup

for.cond.cleanup7: ; preds = %for.body8
%indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
%exitcond40 = icmp ne i64 %indvars.iv.next39, 256
br i1 %exitcond40, label %for.cond5.preheader, label %for.cond.cleanup3

for.body8: ; preds = %for.cond5.preheader, %for.body8
%indvars.iv = phi i64 [ 1, %for.cond5.preheader ], [ %indvars.iv.next, %for.body8 ]
%0 = add nsw i64 %indvars.iv, -1
%arrayidx10 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %0, i64 %indvars.iv38
%1 = load float, ptr %arrayidx10, align 4
%arrayidx14 = getelementptr inbounds [256 x [256 x float]], ptr @bb, i64 0, i64 %indvars.iv, i64 %indvars.iv38
%2 = load float, ptr %arrayidx14, align 4
%add = fadd fast float %2, %1
%arrayidx18 = getelementptr inbounds [256 x [256 x float]], ptr @aa, i64 0, i64 %indvars.iv, i64 %indvars.iv38
store float %add, ptr %arrayidx18, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp ne i64 %indvars.iv.next, 256
br i1 %exitcond, label %for.body8, label %for.cond.cleanup7

for.cond5.preheader: ; preds = %for.cond1.preheader, %for.cond.cleanup7
%indvars.iv38 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next39, %for.cond.cleanup7 ]
br label %for.body8

; Exit blocks
for.cond.cleanup: ; preds = %for.cond.cleanup3
ret float undef
}