Skip to content

Commit 6da3076

Browse files
committed
[LoopFusion] Detecting legal dependencies for fusion using DA info
Loop fusion pass will use the information provided by the recent DA patch to fuse additional legal loops, including those with forward loop-carried dependencies.
1 parent 2bbc740 commit 6da3076

File tree

3 files changed

+253
-16
lines changed

3 files changed

+253
-16
lines changed

llvm/lib/Transforms/Scalar/LoopFuse.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ STATISTIC(OnlySecondCandidateIsGuarded,
100100
"The second candidate is guarded while the first one is not");
101101
STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
102102
STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
103+
STATISTIC(NumDA, "DA checks passed");
103104

104105
enum FusionDependenceAnalysisChoice {
105106
FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -1371,6 +1372,47 @@ struct LoopFuser {
13711372
<< "\n");
13721373
}
13731374
#endif
1375+
unsigned Levels = DepResult->getLevels();
1376+
unsigned SameSDLevels = DepResult->getSameSDLevels();
1377+
unsigned CurLoopLevel = FC0.L->getLoopDepth();
1378+
1379+
// Check if DA is missing info regarding the current loop level
1380+
if (CurLoopLevel > Levels + SameSDLevels)
1381+
return false;
1382+
1383+
// Iterating over the outer levels.
1384+
for (unsigned Level = 1; Level <= std::min(CurLoopLevel - 1, Levels);
1385+
++Level) {
1386+
unsigned Direction = DepResult->getDirection(Level, false);
1387+
1388+
// Check if the direction vector does not include equality. If an outer
1389+
// loop has a non-equal direction, outer indicies are different and it
1390+
// is safe to fuse.
1391+
if (!(Direction & Dependence::DVEntry::EQ)) {
1392+
LLVM_DEBUG(dbgs() << "Safe to fuse due to non-equal acceses in the "
1393+
"outer loops\n");
1394+
NumDA++;
1395+
return true;
1396+
}
1397+
}
1398+
1399+
assert(CurLoopLevel > Levels && "Fusion candidates are not separated");
1400+
1401+
unsigned CurDir = DepResult->getDirection(CurLoopLevel, true);
1402+
1403+
// Check if the direction vector does not include greater direction. In
1404+
// that case, the dependency is not a backward loop-carried and is legal
1405+
// to fuse. For example here we have a forward dependency
1406+
// for (int i = 0; i < n; i++)
1407+
// A[i] = ...;
1408+
// for (int i = 0; i < n; i++)
1409+
// ... = A[i-1];
1410+
if (!(CurDir & Dependence::DVEntry::GT)) {
1411+
LLVM_DEBUG(dbgs() << "Safe to fuse with no backward loop-carried "
1412+
"dependency\n");
1413+
NumDA++;
1414+
return true;
1415+
}
13741416

13751417
if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
13761418
LLVM_DEBUG(
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
4+
; STAT: 2 loop-fusion - DA checks passed
5+
6+
; The two inner loops have no dependency and are allowed to be fused as in the
7+
; outer loops, different levels are accessed to.
8+
9+
; C Code
10+
;
11+
;; for (long int i = 0; i < n; i++) {
12+
;; for (long int j = 0; j < n; j++) {
13+
;; for (long int k = 0; k < n; k++)
14+
;; A[i][j][k] = i;
15+
;; for (long int k = 0; k < n; k++)
16+
;; temp = A[i + 3][j + 2][k + 1];
17+
;; }
18+
;; }
19+
20+
define void @nonequal_outer_access(i64 %n, ptr %A) nounwind uwtable ssp {
21+
entry:
22+
%cmp10 = icmp sgt i64 %n, 0
23+
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
24+
25+
for.cond1.preheader.preheader: ; preds = %entry
26+
br label %for.cond1.preheader
27+
28+
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
29+
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
30+
%cmp26 = icmp sgt i64 %n, 0
31+
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
32+
33+
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
34+
br label %for.cond4.preheader
35+
36+
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
37+
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
38+
%cmp51 = icmp sgt i64 %n, 0
39+
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
40+
41+
for.body6.preheader: ; preds = %for.cond4.preheader
42+
br label %for.body6
43+
44+
for.body6: ; preds = %for.body6.preheader, %for.body6
45+
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
46+
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
47+
store i64 %i.011, ptr %arrayidx8, align 8
48+
%inc = add nsw i64 %k.02, 1
49+
%exitcond13 = icmp ne i64 %inc, %n
50+
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
51+
52+
for.cond10.loopexit.loopexit: ; preds = %for.body6
53+
br label %for.cond10.loopexit
54+
55+
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
56+
%cmp113 = icmp sgt i64 %n, 0
57+
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
58+
59+
for.body12.preheader: ; preds = %for.cond10.loopexit
60+
br label %for.body12
61+
62+
for.body12: ; preds = %for.body12.preheader, %for.body12
63+
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
64+
%add = add nsw i64 %k9.05, 1
65+
%add13 = add nsw i64 %j.07, 2
66+
%add14 = add nsw i64 %i.011, 3
67+
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
68+
%0 = load i64, ptr %arrayidx17, align 8
69+
%inc19 = add nsw i64 %k9.05, 1
70+
%exitcond = icmp ne i64 %inc19, %n
71+
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
72+
73+
for.inc21.loopexit: ; preds = %for.body12
74+
br label %for.inc21
75+
76+
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
77+
%inc22 = add nsw i64 %j.07, 1
78+
%exitcond14 = icmp ne i64 %inc22, %n
79+
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
80+
81+
for.inc24.loopexit: ; preds = %for.inc21
82+
br label %for.inc24
83+
84+
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
85+
%inc25 = add nsw i64 %i.011, 1
86+
%exitcond15 = icmp ne i64 %inc25, %n
87+
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
88+
89+
for.end26.loopexit: ; preds = %for.inc24
90+
br label %for.end26
91+
92+
for.end26: ; preds = %for.end26.loopexit, %entry
93+
ret void
94+
}
95+
96+
; The two inner loops have a forward loop-carried dependency, allowing them
97+
; to be fused.
98+
99+
; C Code
100+
;
101+
;; for (long int i = 0; i < n; i++) {
102+
;; for (long int j = 0; j < n; j++) {
103+
;; for (long int k = 0; k < n; k++)
104+
;; A[i][j][k] = i;
105+
;; for (long int k = 0; k < n; k++)
106+
;; temp = A[i][j][k - 1];
107+
;; }
108+
;; }
109+
110+
define void @forward_dep(i64 %n, ptr %A) nounwind uwtable ssp {
111+
entry:
112+
%cmp10 = icmp sgt i64 %n, 0
113+
br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
114+
115+
for.cond1.preheader.preheader: ; preds = %entry
116+
br label %for.cond1.preheader
117+
118+
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
119+
%i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
120+
%cmp26 = icmp sgt i64 %n, 0
121+
br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
122+
123+
for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
124+
br label %for.cond4.preheader
125+
126+
for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
127+
%j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
128+
%cmp51 = icmp sgt i64 %n, 0
129+
br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
130+
131+
for.body6.preheader: ; preds = %for.cond4.preheader
132+
br label %for.body6
133+
134+
for.body6: ; preds = %for.body6.preheader, %for.body6
135+
%k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
136+
%arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
137+
store i64 %i.011, ptr %arrayidx8, align 8
138+
%inc = add nsw i64 %k.02, 1
139+
%exitcond13 = icmp ne i64 %inc, %n
140+
br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
141+
142+
for.cond10.loopexit.loopexit: ; preds = %for.body6
143+
br label %for.cond10.loopexit
144+
145+
for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
146+
%cmp113 = icmp sgt i64 %n, 0
147+
br i1 %cmp113, label %for.body12.preheader, label %for.inc21
148+
149+
for.body12.preheader: ; preds = %for.cond10.loopexit
150+
br label %for.body12
151+
152+
for.body12: ; preds = %for.body12.preheader, %for.body12
153+
%k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
154+
%add = add nsw i64 %k9.05, -1
155+
%arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
156+
%0 = load i64, ptr %arrayidx17, align 8
157+
%inc19 = add nsw i64 %k9.05, 1
158+
%exitcond = icmp ne i64 %inc19, %n
159+
br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
160+
161+
for.inc21.loopexit: ; preds = %for.body12
162+
br label %for.inc21
163+
164+
for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
165+
%inc22 = add nsw i64 %j.07, 1
166+
%exitcond14 = icmp ne i64 %inc22, %n
167+
br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
168+
169+
for.inc24.loopexit: ; preds = %for.inc21
170+
br label %for.inc24
171+
172+
for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
173+
%inc25 = add nsw i64 %i.011, 1
174+
%exitcond15 = icmp ne i64 %inc25, %n
175+
br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
176+
177+
for.end26.loopexit: ; preds = %for.inc24
178+
br label %for.end26
179+
180+
for.end26: ; preds = %for.end26.loopexit, %entry
181+
ret void
182+
}

llvm/test/Transforms/LoopFusion/simple.ll

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -298,42 +298,55 @@ bb23: ; preds = %bb17, %bb
298298
ret void
299299
}
300300

301+
; The following IR is a representation of the provided code below. With PR
302+
; #146383, loop fusion is able to utilize the information from dependence
303+
; analysis, enabling the loops in the function to be fused.
304+
;
305+
; void forward_dep(int *arg) {
306+
; for (int i = 0; i < 100; i++) {
307+
; int tmp = i - 3;
308+
; int val = tmp * (i + 3) % i;
309+
; arg[i] = val;
310+
; }
311+
;
312+
; for (int j = 0; j < 100; j++) {
313+
; int val = arg[j - 3];
314+
; arg[j] = val * 3;
315+
; }
316+
; }
317+
;
301318
define void @forward_dep(ptr noalias %arg) {
302319
; CHECK-LABEL: @forward_dep(
303-
; CHECK-NEXT: bb:
304-
; CHECK-NEXT: br label [[BB7:%.*]]
320+
; CHECK-NEXT: [[BB:.*]]:
321+
; CHECK-NEXT: br label %[[BB7:.*]]
305322
; CHECK: bb7:
306-
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
307-
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
323+
; CHECK-NEXT: [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
324+
; CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
325+
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
308326
; CHECK-NEXT: [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
309327
; CHECK-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
310328
; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
311329
; CHECK-NEXT: [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
312330
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
313331
; CHECK-NEXT: [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
314-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
332+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
315333
; CHECK-NEXT: store i32 [[TMP12]], ptr [[TMP13]], align 4
316-
; CHECK-NEXT: br label [[BB14]]
334+
; CHECK-NEXT: br label %[[BB14:.*]]
317335
; CHECK: bb14:
318-
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
319-
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
320-
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
321-
; CHECK-NEXT: br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
322-
; CHECK: bb19.preheader:
323-
; CHECK-NEXT: br label [[BB19:%.*]]
324-
; CHECK: bb19:
325-
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
326336
; CHECK-NEXT: [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
327337
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
328338
; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
329339
; CHECK-NEXT: [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
330340
; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
331341
; CHECK-NEXT: store i32 [[TMP23]], ptr [[TMP24]], align 4
332-
; CHECK-NEXT: br label [[BB25]]
342+
; CHECK-NEXT: br label %[[BB25]]
333343
; CHECK: bb25:
344+
; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
345+
; CHECK-NEXT: [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
346+
; CHECK-NEXT: [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
334347
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
335348
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
336-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
349+
; CHECK-NEXT: br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
337350
; CHECK: bb26:
338351
; CHECK-NEXT: ret void
339352
;

0 commit comments

Comments
 (0)