Skip to content

Commit 45ebde4

Browse files
1997alirezamahesh-attarde
authored andcommitted
[LoopFusion] Detecting legal dependencies for fusion using DA info (llvm#146383)
Loop fusion pass will use the information provided by the recent DA patch to fuse additional legal loops, including those with forward loop-carried dependencies.
1 parent 4984398 commit 45ebde4

File tree

3 files changed

+253
-16
lines changed

3 files changed

+253
-16
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ STATISTIC(OnlySecondCandidateIsGuarded,
100100
"The second candidate is guarded while the first one is not");
101101
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
102102
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
103+
STATISTIC(NumDA, "DA checks passed");
103104

104105
enum FusionDependenceAnalysisChoice {
105106
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -1371,6 +1372,47 @@ struct LoopFuser {
13711372
<< "\n");
13721373
}
13731374
#endif
1375+
unsigned Levels = DepResult->getLevels();
1376+
unsigned SameSDLevels = DepResult->getSameSDLevels();
1377+
unsigned CurLoopLevel = FC0.L->getLoopDepth();
1378+
1379+
// Check if DA is missing info regarding the current loop level
1380+
if (CurLoopLevel > Levels + SameSDLevels)
1381+
return false;
1382+
1383+
// Iterating over the outer levels.
1384+
for (unsigned Level = 1; Level <= std::min(CurLoopLevel - 1, Levels);
1385+
++Level) {
1386+
unsigned Direction = DepResult->getDirection(Level, false);
1387+
1388+
// Check if the direction vector does not include equality. If an outer
1389+
// loop has a non-equal direction, outer indicies are different and it
1390+
// is safe to fuse.
1391+
if (!(Direction & Dependence::DVEntry::EQ)) {
1392+
LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the "
1393+
"outer loops\n");
1394+
NumDA++;
1395+
return true;
1396+
}
1397+
}
1398+
1399+
assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
1400+
1401+
unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);
1402+
1403+
// Check if the direction vector does not include greater direction. In
1404+
// that case, the dependency is not a backward loop-carried and is legal
1405+
// to fuse. For example here we have a forward dependency
1406+
// for (int i = 0; i < n; i++)
1407+
// A[i] = ...;
1408+
// for (int i = 0; i < n; i++)
1409+
// ... = A[i-1];
1410+
if (!(CurDir & Dependence::DVEntry::GT)) {
1411+
LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried "
1412+
"dependency\n");
1413+
NumDA++;
1414+
return true;
1415+
}
13741416

13751417
if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
13761418
LLVM_DEBUG(
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
4+
; STAT: 2 loop-fusion - DA checks passed
5+
6+
; The two inner loops have no dependency and are allowed to be fused as in the
7+
; outer loops, different levels are accessed to.
8+
9+
; C Code
10+
;
11+
;; for (long int i = 0; i < n; i++) {
12+
;; for (long int j = 0; j < n; j++) {
13+
;; for (long int k = 0; k < n; k++)
14+
;; A[i][j][k] = i;
15+
;; for (long int k = 0; k < n; k++)
16+
;; temp = A[i + 3][j + 2][k + 1];
17+
;; }
18+
;; }
19+
20+
define void @nonequal_outer_access(i64 %n, ptr %A) nounwind uwtable ssp {
21+
entry:
22+
%cmp10 = icmp sgt i64 %n, 0
23+
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
24+
25+
for.cond1.preheader.preheader: ; preds = %entry
26+
br label %for.cond1.preheader
27+
28+
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
29+
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
30+
%cmp26 = icmp sgt i64 %n, 0
31+
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
32+
33+
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
34+
br label %for.cond4.preheader
35+
36+
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
37+
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
38+
%cmp51 = icmp sgt i64 %n, 0
39+
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
40+
41+
for.body6.preheader: ; preds = %for.cond4.preheader
42+
br label %for.body6
43+
44+
for.body6: ; preds = %for.body6.preheader, %for.body6
45+
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
46+
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
47+
store i64 %i.011, ptr %arrayidx8, align 8
48+
%inc = add nsw i64 %k.02, 1
49+
%exitcond13 = icmp ne i64 %inc, %n
50+
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
51+
52+
for.cond10.loopexit.loopexit: ; preds = %for.body6
53+
br label %for.cond10.loopexit
54+
55+
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
56+
%cmp113 = icmp sgt i64 %n, 0
57+
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
58+
59+
for.body12.preheader: ; preds = %for.cond10.loopexit
60+
br label %for.body12
61+
62+
for.body12: ; preds = %for.body12.preheader, %for.body12
63+
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
64+
%add = add nsw i64 %k9.05, 1
65+
%add13 = add nsw i64 %j.07, 2
66+
%add14 = add nsw i64 %i.011, 3
67+
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
68+
%0 = load i64, ptr %arrayidx17, align 8
69+
%inc19 = add nsw i64 %k9.05, 1
70+
%exitcond = icmp ne i64 %inc19, %n
71+
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
72+
73+
for.inc21.loopexit: ; preds = %for.body12
74+
br label %for.inc21
75+
76+
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
77+
%inc22 = add nsw i64 %j.07, 1
78+
%exitcond14 = icmp ne i64 %inc22, %n
79+
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
80+
81+
for.inc24.loopexit: ; preds = %for.inc21
82+
br label %for.inc24
83+
84+
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
85+
%inc25 = add nsw i64 %i.011, 1
86+
%exitcond15 = icmp ne i64 %inc25, %n
87+
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
88+
89+
for.end26.loopexit: ; preds = %for.inc24
90+
br label %for.end26
91+
92+
for.end26: ; preds = %for.end26.loopexit, %entry
93+
ret void
94+
}
95+
96+
; The two inner loops have a forward loop-carried dependency, allowing them
97+
; to be fused.
98+
99+
; C Code
100+
;
101+
;; for (long int i = 0; i < n; i++) {
102+
;; for (long int j = 0; j < n; j++) {
103+
;; for (long int k = 0; k < n; k++)
104+
;; A[i][j][k] = i;
105+
;; for (long int k = 0; k < n; k++)
106+
;; temp = A[i][j][k - 1];
107+
;; }
108+
;; }
109+
110+
define void @forward_dep(i64 %n, ptr %A) nounwind uwtable ssp {
111+
entry:
112+
%cmp10 = icmp sgt i64 %n, 0
113+
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
114+
115+
for.cond1.preheader.preheader: ; preds = %entry
116+
br label %for.cond1.preheader
117+
118+
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
119+
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
120+
%cmp26 = icmp sgt i64 %n, 0
121+
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
122+
123+
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
124+
br label %for.cond4.preheader
125+
126+
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
127+
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
128+
%cmp51 = icmp sgt i64 %n, 0
129+
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
130+
131+
for.body6.preheader: ; preds = %for.cond4.preheader
132+
br label %for.body6
133+
134+
for.body6: ; preds = %for.body6.preheader, %for.body6
135+
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
136+
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
137+
store i64 %i.011, ptr %arrayidx8, align 8
138+
%inc = add nsw i64 %k.02, 1
139+
%exitcond13 = icmp ne i64 %inc, %n
140+
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
141+
142+
for.cond10.loopexit.loopexit: ; preds = %for.body6
143+
br label %for.cond10.loopexit
144+
145+
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
146+
%cmp113 = icmp sgt i64 %n, 0
147+
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
148+
149+
for.body12.preheader: ; preds = %for.cond10.loopexit
150+
br label %for.body12
151+
152+
for.body12: ; preds = %for.body12.preheader, %for.body12
153+
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
154+
%add = add nsw i64 %k9.05, -1
155+
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
156+
%0 = load i64, ptr %arrayidx17, align 8
157+
%inc19 = add nsw i64 %k9.05, 1
158+
%exitcond = icmp ne i64 %inc19, %n
159+
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
160+
161+
for.inc21.loopexit: ; preds = %for.body12
162+
br label %for.inc21
163+
164+
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
165+
%inc22 = add nsw i64 %j.07, 1
166+
%exitcond14 = icmp ne i64 %inc22, %n
167+
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
168+
169+
for.inc24.loopexit: ; preds = %for.inc21
170+
br label %for.inc24
171+
172+
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
173+
%inc25 = add nsw i64 %i.011, 1
174+
%exitcond15 = icmp ne i64 %inc25, %n
175+
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
176+
177+
for.end26.loopexit: ; preds = %for.inc24
178+
br label %for.end26
179+
180+
for.end26: ; preds = %for.end26.loopexit, %entry
181+
ret void
182+
}

llvm/test/Transforms/LoopFusion/simple.ll

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -298,42 +298,55 @@ bb23: ; preds = %bb17, %bb
298298
ret void
299299
}
300300

301+
; The following IR is a representation of the provided code below. With PR
302+
; #146383, loop fusion is able to utilize the information from dependence
303+
; analysis, enabling the loops in the function to be fused.
304+
;
305+
; void forward_dep(int *arg) {
306+
; for (int i = 0; i < 100; i++) {
307+
; int tmp = i - 3;
308+
; int val = tmp * (i + 3) % i;
309+
; arg[i] = val;
310+
; }
311+
;
312+
; for (int j = 0; j < 100; j++) {
313+
; int val = arg[j - 3];
314+
; arg[j] = val * 3;
315+
; }
316+
; }
317+
;
301318
define void @forward_dep(ptr noalias %arg) {
302319
; CHECK-LABEL: @forward_dep(
303-
; CHECK-NEXT: bb:
304-
; CHECK-NEXT: br label [[BB7:%.*]]
320+
; CHECK-NEXT: [[BB:.*]]:
321+
; CHECK-NEXT: br label %[[BB7:.*]]
305322
; CHECK: bb7:
306-
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
307-
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
323+
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
324+
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
325+
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
308326
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
309327
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
310328
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
311329
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
312330
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
313331
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
314-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
332+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
315333
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4
316-
; CHECK-NEXT: br label [[BB14]]
334+
; CHECK-NEXT: br label %[[BB14:.*]]
317335
; CHECK: bb14:
318-
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
319-
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
320-
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
321-
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
322-
; CHECK: bb19.preheader:
323-
; CHECK-NEXT: br label [[BB19:%.*]]
324-
; CHECK: bb19:
325-
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
326336
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
327337
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
328338
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
329339
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
330340
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
331341
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4
332-
; CHECK-NEXT: br label [[BB25]]
342+
; CHECK-NEXT: br label %[[BB25]]
333343
; CHECK: bb25:
344+
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
345+
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
346+
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
334347
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
335348
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
336-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
349+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
337350
; CHECK: bb26:
338351
; CHECK-NEXT: ret void
339352
;

0 commit comments

Comments
 (0)