Skip to content

Commit dac9012

Browse files
committed
[LoopInterchange] Swap Src and Dst to get distance without normalize
1 parent 12cab0e commit dac9012

File tree

3 files changed

+16
-26
lines changed

3 files changed

+16
-26
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/Analysis/DependenceAnalysis.h"
2121
#include "llvm/Analysis/LoopCacheAnalysis.h"
2222
#include "llvm/Analysis/LoopInfo.h"
23+
#include "llvm/Analysis/LoopIterator.h"
2324
#include "llvm/Analysis/LoopNestAnalysis.h"
2425
#include "llvm/Analysis/LoopPass.h"
2526
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -84,13 +85,16 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
8485

8586
static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
8687
Loop *L, DependenceInfo *DI,
87-
ScalarEvolution *SE) {
88+
ScalarEvolution *SE, LoopInfo *LI) {
8889
using ValueVector = SmallVector<Value *, 16>;
8990

9091
ValueVector MemInstr;
9192

92-
// For each block.
93-
for (BasicBlock *BB : L->blocks()) {
93+
// Traverse blocks in fixed RPOT order, regardless of their storage in the
94+
// loop info, as it may be arbitrary.
95+
LoopBlocksRPO RPOT(L);
96+
RPOT.perform(LI);
97+
for (BasicBlock *BB : RPOT) {
9498
// Scan the BB and collect legal loads and stores.
9599
for (Instruction &I : *BB) {
96100
if (!isa<Instruction>(I))
@@ -115,18 +119,14 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
115119
for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
116120
for (J = I, JE = MemInstr.end(); J != JE; ++J) {
117121
std::vector<char> Dep;
118-
Instruction *Src = cast<Instruction>(*I);
119-
Instruction *Dst = cast<Instruction>(*J);
122+
Instruction *Src = cast<Instruction>(*J);
123+
Instruction *Dst = cast<Instruction>(*I);
120124
// Ignore Input dependencies.
121125
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
122126
continue;
123127
// Track Output, Flow, and Anti dependencies.
124128
if (auto D = DI->depends(Src, Dst, true)) {
125129
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
126-
// If the direction vector is negative, normalize it to
127-
// make it non-negative.
128-
if (D->normalize(SE))
129-
LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
130130
LLVM_DEBUG(StringRef DepType =
131131
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
132132
dbgs() << "Found " << DepType
@@ -438,7 +438,7 @@ struct LoopInterchange {
438438
CharMatrix DependencyMatrix;
439439
Loop *OuterMostLoop = *(LoopList.begin());
440440
if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
441-
OuterMostLoop, DI, SE)) {
441+
OuterMostLoop, DI, SE, LI)) {
442442
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
443443
return false;
444444
}

llvm/test/Transforms/LoopInterchange/interchange-s231.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
;; aa[j][i] = aa[j - 1][i] + bb[j][i];
1212

1313
; CHECK: Processing InnerLoopId = 2 and OuterLoopId = 1
14-
; CHECK: Not interchanging loops. Cannot prove legality.
14+
; CHECK: Loops interchanged.
1515

1616
define float @s231() {
1717
entry:

llvm/test/Transforms/LoopInterchange/pr56275.ll

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,14 @@ target triple = "aarch64-unknown-linux-gnu"
2121
define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias noundef %c) {
2222
; CHECK-LABEL: @test1(
2323
; CHECK-NEXT: entry:
24-
; CHECK-NEXT: br label [[LOOP2_HEADER_PREHEADER:%.*]]
25-
; CHECK: loop1.header.preheader:
2624
; CHECK-NEXT: br label [[LOOP1_HEADER:%.*]]
2725
; CHECK: loop1.header:
28-
; CHECK-NEXT: [[I2:%.*]] = phi i64 [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ], [ 1, [[LOOP1_HEADER_PREHEADER:%.*]] ]
26+
; CHECK-NEXT: [[I2:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[I2_INC:%.*]], [[LOOP1_LATCH:%.*]] ]
2927
; CHECK-NEXT: [[I2_ST:%.*]] = add i64 [[I2]], 1
3028
; CHECK-NEXT: [[I2_LD:%.*]] = add i64 [[I2]], 0
31-
; CHECK-NEXT: br label [[LOOP2_HEADER_SPLIT1:%.*]]
32-
; CHECK: loop2.header.preheader:
3329
; CHECK-NEXT: br label [[LOOP2_HEADER:%.*]]
3430
; CHECK: loop2.header:
35-
; CHECK-NEXT: [[I1:%.*]] = phi i64 [ [[TMP0:%.*]], [[LOOP2_HEADER_SPLIT:%.*]] ], [ 1, [[LOOP2_HEADER_PREHEADER]] ]
36-
; CHECK-NEXT: br label [[LOOP1_HEADER_PREHEADER]]
37-
; CHECK: loop2.header.split1:
31+
; CHECK-NEXT: [[I1:%.*]] = phi i64 [ 1, [[LOOP1_HEADER]] ], [ [[I1_INC:%.*]], [[LOOP2_HEADER]] ]
3832
; CHECK-NEXT: [[I1_ST:%.*]] = add i64 [[I1]], 0
3933
; CHECK-NEXT: [[I1_LD:%.*]] = add i64 [[I1]], 0
4034
; CHECK-NEXT: [[A_ST:%.*]] = getelementptr inbounds [64 x i32], ptr [[A:%.*]], i64 [[I1_ST]], i64 [[I2_ST]]
@@ -45,17 +39,13 @@ define void @test1(ptr noalias noundef %a, ptr noalias noundef %b, ptr noalias n
4539
; CHECK-NEXT: store i32 [[B_VAL]], ptr [[A_ST]], align 4
4640
; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A_LD]], align 4
4741
; CHECK-NEXT: store i32 [[A_VAL]], ptr [[C_ST]], align 4
48-
; CHECK-NEXT: [[I1_INC:%.*]] = add nuw nsw i64 [[I1]], 1
42+
; CHECK-NEXT: [[I1_INC]] = add nuw nsw i64 [[I1]], 1
4943
; CHECK-NEXT: [[LOOP2_EXITCOND_NOT:%.*]] = icmp eq i64 [[I1_INC]], 63
50-
; CHECK-NEXT: br label [[LOOP1_LATCH]]
51-
; CHECK: loop2.header.split:
52-
; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[I1]], 1
53-
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 63
54-
; CHECK-NEXT: br i1 [[TMP1]], label [[EXIT:%.*]], label [[LOOP2_HEADER]]
44+
; CHECK-NEXT: br i1 [[LOOP2_EXITCOND_NOT]], label [[LOOP1_LATCH]], label [[LOOP2_HEADER]]
5545
; CHECK: loop1.latch:
5646
; CHECK-NEXT: [[I2_INC]] = add nuw nsw i64 [[I2]], 1
5747
; CHECK-NEXT: [[LOOP1_EXITCOND_NOT:%.*]] = icmp eq i64 [[I2_INC]], 63
58-
; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[LOOP2_HEADER_SPLIT]], label [[LOOP1_HEADER]]
48+
; CHECK-NEXT: br i1 [[LOOP1_EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP1_HEADER]]
5949
; CHECK: exit:
6050
; CHECK-NEXT: ret void
6151
;

0 commit comments

Comments
 (0)