1
+ ; REQUIRES: asserts
2
+
3
+ ; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
4
+ ; STAT: 2 loop-fusion - DA checks passed
5
+
6
+ ; The two inner loops have no dependency and are allowed to be fused as in the
7
+ ; outer loops, different levels are accessed to.
8
+
9
+ ; C Code
10
+ ;
11
+ ;; for (long int i = 0; i < n; i++) {
12
+ ;; for (long int j = 0; j < n; j++) {
13
+ ;; for (long int k = 0; k < n; k++)
14
+ ;; A[i][j][k] = i;
15
+ ;; for (long int k = 0; k < n; k++)
16
+ ;; temp = A[i + 3][j + 2][k + 1];
17
+ ;; }
18
+ ;; }
19
+
20
+ define void @nonequal_outer_access (i64 %n , ptr %A ) nounwind uwtable ssp {
21
+ entry:
22
+ %cmp10 = icmp sgt i64 %n , 0
23
+ br i1 %cmp10 , label %for.cond1.preheader.preheader , label %for.end26
24
+
25
+ for.cond1.preheader.preheader: ; preds = %entry
26
+ br label %for.cond1.preheader
27
+
28
+ for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
29
+ %i.011 = phi i64 [ %inc25 , %for.inc24 ], [ 0 , %for.cond1.preheader.preheader ]
30
+ %cmp26 = icmp sgt i64 %n , 0
31
+ br i1 %cmp26 , label %for.cond4.preheader.preheader , label %for.inc24
32
+
33
+ for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
34
+ br label %for.cond4.preheader
35
+
36
+ for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
37
+ %j.07 = phi i64 [ %inc22 , %for.inc21 ], [ 0 , %for.cond4.preheader.preheader ]
38
+ %cmp51 = icmp sgt i64 %n , 0
39
+ br i1 %cmp51 , label %for.body6.preheader , label %for.cond10.loopexit
40
+
41
+ for.body6.preheader: ; preds = %for.cond4.preheader
42
+ br label %for.body6
43
+
44
+ for.body6: ; preds = %for.body6.preheader, %for.body6
45
+ %k.02 = phi i64 [ %inc , %for.body6 ], [ 0 , %for.body6.preheader ]
46
+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %i.011 , i64 %j.07 , i64 %k.02
47
+ store i64 %i.011 , ptr %arrayidx8 , align 8
48
+ %inc = add nsw i64 %k.02 , 1
49
+ %exitcond13 = icmp ne i64 %inc , %n
50
+ br i1 %exitcond13 , label %for.body6 , label %for.cond10.loopexit.loopexit
51
+
52
+ for.cond10.loopexit.loopexit: ; preds = %for.body6
53
+ br label %for.cond10.loopexit
54
+
55
+ for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
56
+ %cmp113 = icmp sgt i64 %n , 0
57
+ br i1 %cmp113 , label %for.body12.preheader , label %for.inc21
58
+
59
+ for.body12.preheader: ; preds = %for.cond10.loopexit
60
+ br label %for.body12
61
+
62
+ for.body12: ; preds = %for.body12.preheader, %for.body12
63
+ %k9.05 = phi i64 [ %inc19 , %for.body12 ], [ 0 , %for.body12.preheader ]
64
+ %add = add nsw i64 %k9.05 , 1
65
+ %add13 = add nsw i64 %j.07 , 2
66
+ %add14 = add nsw i64 %i.011 , 3
67
+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %add14 , i64 %add13 , i64 %add
68
+ %0 = load i64 , ptr %arrayidx17 , align 8
69
+ %inc19 = add nsw i64 %k9.05 , 1
70
+ %exitcond = icmp ne i64 %inc19 , %n
71
+ br i1 %exitcond , label %for.body12 , label %for.inc21.loopexit
72
+
73
+ for.inc21.loopexit: ; preds = %for.body12
74
+ br label %for.inc21
75
+
76
+ for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
77
+ %inc22 = add nsw i64 %j.07 , 1
78
+ %exitcond14 = icmp ne i64 %inc22 , %n
79
+ br i1 %exitcond14 , label %for.cond4.preheader , label %for.inc24.loopexit
80
+
81
+ for.inc24.loopexit: ; preds = %for.inc21
82
+ br label %for.inc24
83
+
84
+ for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
85
+ %inc25 = add nsw i64 %i.011 , 1
86
+ %exitcond15 = icmp ne i64 %inc25 , %n
87
+ br i1 %exitcond15 , label %for.cond1.preheader , label %for.end26.loopexit
88
+
89
+ for.end26.loopexit: ; preds = %for.inc24
90
+ br label %for.end26
91
+
92
+ for.end26: ; preds = %for.end26.loopexit, %entry
93
+ ret void
94
+ }
95
+
96
+ ; The two inner loops have a forward loop-carried dependency, allowing them
97
+ ; to be fused.
98
+
99
+ ; C Code
100
+ ;
101
+ ;; for (long int i = 0; i < n; i++) {
102
+ ;; for (long int j = 0; j < n; j++) {
103
+ ;; for (long int k = 0; k < n; k++)
104
+ ;; A[i][j][k] = i;
105
+ ;; for (long int k = 0; k < n; k++)
106
+ ;; temp = A[i][j][k - 1];
107
+ ;; }
108
+ ;; }
109
+
110
+ define void @forward_dep (i64 %n , ptr %A ) nounwind uwtable ssp {
111
+ entry:
112
+ %cmp10 = icmp sgt i64 %n , 0
113
+ br i1 %cmp10 , label %for.cond1.preheader.preheader , label %for.end26
114
+
115
+ for.cond1.preheader.preheader: ; preds = %entry
116
+ br label %for.cond1.preheader
117
+
118
+ for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
119
+ %i.011 = phi i64 [ %inc25 , %for.inc24 ], [ 0 , %for.cond1.preheader.preheader ]
120
+ %cmp26 = icmp sgt i64 %n , 0
121
+ br i1 %cmp26 , label %for.cond4.preheader.preheader , label %for.inc24
122
+
123
+ for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
124
+ br label %for.cond4.preheader
125
+
126
+ for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
127
+ %j.07 = phi i64 [ %inc22 , %for.inc21 ], [ 0 , %for.cond4.preheader.preheader ]
128
+ %cmp51 = icmp sgt i64 %n , 0
129
+ br i1 %cmp51 , label %for.body6.preheader , label %for.cond10.loopexit
130
+
131
+ for.body6.preheader: ; preds = %for.cond4.preheader
132
+ br label %for.body6
133
+
134
+ for.body6: ; preds = %for.body6.preheader, %for.body6
135
+ %k.02 = phi i64 [ %inc , %for.body6 ], [ 0 , %for.body6.preheader ]
136
+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %i.011 , i64 %j.07 , i64 %k.02
137
+ store i64 %i.011 , ptr %arrayidx8 , align 8
138
+ %inc = add nsw i64 %k.02 , 1
139
+ %exitcond13 = icmp ne i64 %inc , %n
140
+ br i1 %exitcond13 , label %for.body6 , label %for.cond10.loopexit.loopexit
141
+
142
+ for.cond10.loopexit.loopexit: ; preds = %for.body6
143
+ br label %for.cond10.loopexit
144
+
145
+ for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
146
+ %cmp113 = icmp sgt i64 %n , 0
147
+ br i1 %cmp113 , label %for.body12.preheader , label %for.inc21
148
+
149
+ for.body12.preheader: ; preds = %for.cond10.loopexit
150
+ br label %for.body12
151
+
152
+ for.body12: ; preds = %for.body12.preheader, %for.body12
153
+ %k9.05 = phi i64 [ %inc19 , %for.body12 ], [ 0 , %for.body12.preheader ]
154
+ %add = add nsw i64 %k9.05 , -1
155
+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %i.011 , i64 %j.07 , i64 %add
156
+ %0 = load i64 , ptr %arrayidx17 , align 8
157
+ %inc19 = add nsw i64 %k9.05 , 1
158
+ %exitcond = icmp ne i64 %inc19 , %n
159
+ br i1 %exitcond , label %for.body12 , label %for.inc21.loopexit
160
+
161
+ for.inc21.loopexit: ; preds = %for.body12
162
+ br label %for.inc21
163
+
164
+ for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
165
+ %inc22 = add nsw i64 %j.07 , 1
166
+ %exitcond14 = icmp ne i64 %inc22 , %n
167
+ br i1 %exitcond14 , label %for.cond4.preheader , label %for.inc24.loopexit
168
+
169
+ for.inc24.loopexit: ; preds = %for.inc21
170
+ br label %for.inc24
171
+
172
+ for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
173
+ %inc25 = add nsw i64 %i.011 , 1
174
+ %exitcond15 = icmp ne i64 %inc25 , %n
175
+ br i1 %exitcond15 , label %for.cond1.preheader , label %for.end26.loopexit
176
+
177
+ for.end26.loopexit: ; preds = %for.inc24
178
+ br label %for.end26
179
+
180
+ for.end26: ; preds = %for.end26.loopexit, %entry
181
+ ret void
182
+ }
0 commit comments