1+ ; REQUIRES: asserts
2+
3+ ; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
4+ ; STAT: 2 loop-fusion - DA checks passed
5+
6+ ; The two inner loops have no dependency and are allowed to be fused as in the
7+ ; outer loops, different levels are accessed to.
8+
9+ ; C Code
10+ ;
11+ ;; for (long int i = 0; i < n; i++) {
12+ ;; for (long int j = 0; j < n; j++) {
13+ ;; for (long int k = 0; k < n; k++)
14+ ;; A[i][j][k] = i;
15+ ;; for (long int k = 0; k < n; k++)
16+ ;; temp = A[i + 3][j + 2][k + 1];
17+ ;; }
18+ ;; }
19+
20+ define void @nonequal_outer_access (i64 %n , ptr %A ) nounwind uwtable ssp {
21+ entry:
22+ %cmp10 = icmp sgt i64 %n , 0
23+ br i1 %cmp10 , label %for.cond1.preheader.preheader , label %for.end26
24+
25+ for.cond1.preheader.preheader: ; preds = %entry
26+ br label %for.cond1.preheader
27+
28+ for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
29+ %i.011 = phi i64 [ %inc25 , %for.inc24 ], [ 0 , %for.cond1.preheader.preheader ]
30+ %cmp26 = icmp sgt i64 %n , 0
31+ br i1 %cmp26 , label %for.cond4.preheader.preheader , label %for.inc24
32+
33+ for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
34+ br label %for.cond4.preheader
35+
36+ for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
37+ %j.07 = phi i64 [ %inc22 , %for.inc21 ], [ 0 , %for.cond4.preheader.preheader ]
38+ %cmp51 = icmp sgt i64 %n , 0
39+ br i1 %cmp51 , label %for.body6.preheader , label %for.cond10.loopexit
40+
41+ for.body6.preheader: ; preds = %for.cond4.preheader
42+ br label %for.body6
43+
44+ for.body6: ; preds = %for.body6.preheader, %for.body6
45+ %k.02 = phi i64 [ %inc , %for.body6 ], [ 0 , %for.body6.preheader ]
46+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %i.011 , i64 %j.07 , i64 %k.02
47+ store i64 %i.011 , ptr %arrayidx8 , align 8
48+ %inc = add nsw i64 %k.02 , 1
49+ %exitcond13 = icmp ne i64 %inc , %n
50+ br i1 %exitcond13 , label %for.body6 , label %for.cond10.loopexit.loopexit
51+
52+ for.cond10.loopexit.loopexit: ; preds = %for.body6
53+ br label %for.cond10.loopexit
54+
55+ for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
56+ %cmp113 = icmp sgt i64 %n , 0
57+ br i1 %cmp113 , label %for.body12.preheader , label %for.inc21
58+
59+ for.body12.preheader: ; preds = %for.cond10.loopexit
60+ br label %for.body12
61+
62+ for.body12: ; preds = %for.body12.preheader, %for.body12
63+ %k9.05 = phi i64 [ %inc19 , %for.body12 ], [ 0 , %for.body12.preheader ]
64+ %add = add nsw i64 %k9.05 , 1
65+ %add13 = add nsw i64 %j.07 , 2
66+ %add14 = add nsw i64 %i.011 , 3
67+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %add14 , i64 %add13 , i64 %add
68+ %0 = load i64 , ptr %arrayidx17 , align 8
69+ %inc19 = add nsw i64 %k9.05 , 1
70+ %exitcond = icmp ne i64 %inc19 , %n
71+ br i1 %exitcond , label %for.body12 , label %for.inc21.loopexit
72+
73+ for.inc21.loopexit: ; preds = %for.body12
74+ br label %for.inc21
75+
76+ for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
77+ %inc22 = add nsw i64 %j.07 , 1
78+ %exitcond14 = icmp ne i64 %inc22 , %n
79+ br i1 %exitcond14 , label %for.cond4.preheader , label %for.inc24.loopexit
80+
81+ for.inc24.loopexit: ; preds = %for.inc21
82+ br label %for.inc24
83+
84+ for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
85+ %inc25 = add nsw i64 %i.011 , 1
86+ %exitcond15 = icmp ne i64 %inc25 , %n
87+ br i1 %exitcond15 , label %for.cond1.preheader , label %for.end26.loopexit
88+
89+ for.end26.loopexit: ; preds = %for.inc24
90+ br label %for.end26
91+
92+ for.end26: ; preds = %for.end26.loopexit, %entry
93+ ret void
94+ }
95+
96+ ; The two inner loops have a forward loop-carried dependency, allowing them
97+ ; to be fused.
98+
99+ ; C Code
100+ ;
101+ ;; for (long int i = 0; i < n; i++) {
102+ ;; for (long int j = 0; j < n; j++) {
103+ ;; for (long int k = 0; k < n; k++)
104+ ;; A[i][j][k] = i;
105+ ;; for (long int k = 0; k < n; k++)
106+ ;; temp = A[i][j][k - 1];
107+ ;; }
108+ ;; }
109+
110+ define void @forward_dep (i64 %n , ptr %A ) nounwind uwtable ssp {
111+ entry:
112+ %cmp10 = icmp sgt i64 %n , 0
113+ br i1 %cmp10 , label %for.cond1.preheader.preheader , label %for.end26
114+
115+ for.cond1.preheader.preheader: ; preds = %entry
116+ br label %for.cond1.preheader
117+
118+ for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc24
119+ %i.011 = phi i64 [ %inc25 , %for.inc24 ], [ 0 , %for.cond1.preheader.preheader ]
120+ %cmp26 = icmp sgt i64 %n , 0
121+ br i1 %cmp26 , label %for.cond4.preheader.preheader , label %for.inc24
122+
123+ for.cond4.preheader.preheader: ; preds = %for.cond1.preheader
124+ br label %for.cond4.preheader
125+
126+ for.cond4.preheader: ; preds = %for.cond4.preheader.preheader, %for.inc21
127+ %j.07 = phi i64 [ %inc22 , %for.inc21 ], [ 0 , %for.cond4.preheader.preheader ]
128+ %cmp51 = icmp sgt i64 %n , 0
129+ br i1 %cmp51 , label %for.body6.preheader , label %for.cond10.loopexit
130+
131+ for.body6.preheader: ; preds = %for.cond4.preheader
132+ br label %for.body6
133+
134+ for.body6: ; preds = %for.body6.preheader, %for.body6
135+ %k.02 = phi i64 [ %inc , %for.body6 ], [ 0 , %for.body6.preheader ]
136+ %arrayidx8 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %i.011 , i64 %j.07 , i64 %k.02
137+ store i64 %i.011 , ptr %arrayidx8 , align 8
138+ %inc = add nsw i64 %k.02 , 1
139+ %exitcond13 = icmp ne i64 %inc , %n
140+ br i1 %exitcond13 , label %for.body6 , label %for.cond10.loopexit.loopexit
141+
142+ for.cond10.loopexit.loopexit: ; preds = %for.body6
143+ br label %for.cond10.loopexit
144+
145+ for.cond10.loopexit: ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
146+ %cmp113 = icmp sgt i64 %n , 0
147+ br i1 %cmp113 , label %for.body12.preheader , label %for.inc21
148+
149+ for.body12.preheader: ; preds = %for.cond10.loopexit
150+ br label %for.body12
151+
152+ for.body12: ; preds = %for.body12.preheader, %for.body12
153+ %k9.05 = phi i64 [ %inc19 , %for.body12 ], [ 0 , %for.body12.preheader ]
154+ %add = add nsw i64 %k9.05 , -1
155+ %arrayidx17 = getelementptr inbounds [100 x [100 x i64 ]], ptr %A , i64 %i.011 , i64 %j.07 , i64 %add
156+ %0 = load i64 , ptr %arrayidx17 , align 8
157+ %inc19 = add nsw i64 %k9.05 , 1
158+ %exitcond = icmp ne i64 %inc19 , %n
159+ br i1 %exitcond , label %for.body12 , label %for.inc21.loopexit
160+
161+ for.inc21.loopexit: ; preds = %for.body12
162+ br label %for.inc21
163+
164+ for.inc21: ; preds = %for.inc21.loopexit, %for.cond10.loopexit
165+ %inc22 = add nsw i64 %j.07 , 1
166+ %exitcond14 = icmp ne i64 %inc22 , %n
167+ br i1 %exitcond14 , label %for.cond4.preheader , label %for.inc24.loopexit
168+
169+ for.inc24.loopexit: ; preds = %for.inc21
170+ br label %for.inc24
171+
172+ for.inc24: ; preds = %for.inc24.loopexit, %for.cond1.preheader
173+ %inc25 = add nsw i64 %i.011 , 1
174+ %exitcond15 = icmp ne i64 %inc25 , %n
175+ br i1 %exitcond15 , label %for.cond1.preheader , label %for.end26.loopexit
176+
177+ for.end26.loopexit: ; preds = %for.inc24
178+ br label %for.end26
179+
180+ for.end26: ; preds = %for.end26.loopexit, %entry
181+ ret void
182+ }
0 commit comments