Skip to content

Commit c21efda

Browse files
committed
Revise tests
1 parent 211be9e commit c21efda

File tree

1 file changed

+35
-36
lines changed

1 file changed

+35
-36
lines changed

llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
@A = dso_local global [256 x [256 x float]] zeroinitializer
66
@B = dso_local global [256 x [256 x float]] zeroinitializer
77
@C = dso_local global [256 x [256 x float]] zeroinitializer
8-
@D = dso_local global [256 x [256 x [256 x float]]] zeroinitializer
9-
@E = dso_local global [256 x [256 x [256 x float]]] zeroinitializer
8+
@D = global [256 x [256 x [256 x float]]] zeroinitializer
9+
@E = global [256 x [256 x [256 x float]]] zeroinitializer
1010

1111
; Check that the below loops are exchanged for vectorization.
1212
;
@@ -107,7 +107,8 @@ exit:
107107
; Check that the below loops are exchanged to allow innermost loop
108108
; vectorization. We cannot vectorize the j-loop because it has a lexically
109109
; backward dependency, but the i-loop can be vectorized because all the
110-
; loop-carried dependencies are lexically forward.
110+
; loop-carried dependencies are lexically forward. LoopVectorize currently only
111+
; vectorizes innermost loop, hence move the i-loop to that position.
111112
;
112113
; for (int i = 0; i < 255; i++) {
113114
; for (int j = 1; j < 256; j++) {
@@ -129,50 +130,50 @@ entry:
129130

130131
for.i.header:
131132
%i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
132-
%i.inc = add nsw i64 %i, 1
133+
%i.inc = add i64 %i, 1
133134
br label %for.j.body
134135

135136
for.j.body:
136137
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
137-
%j.dec = add nsw i64 %j, -1
138-
%a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
139-
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
140-
%c.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i.inc, i64 %j
141-
%c.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j
142-
%a = load float, ptr %a.load.index, align 4
143-
%b = load float, ptr %b.index, align 4
144-
%c0 = load float, ptr %c.load.index, align 4
145-
%c1 = load float, ptr %c.store.index, align 4
138+
%j.dec = add i64 %j, -1
139+
%a.load.index = getelementptr [256 x [256 x float]], ptr @A, i64 0, i64 %i, i64 %j.dec
140+
%b.index = getelementptr [256 x [256 x float]], ptr @B, i64 0, i64 %i, i64 %j
141+
%c.load.index = getelementptr [256 x [256 x float]], ptr @C, i64 0, i64 %i.inc, i64 %j
142+
%c.store.index = getelementptr [256 x [256 x float]], ptr @C, i64 0, i64 %i, i64 %j
143+
%a = load float, ptr %a.load.index
144+
%b = load float, ptr %b.index
145+
%c0 = load float, ptr %c.load.index
146+
%c1 = load float, ptr %c.store.index
146147
%add.0 = fadd float %a, %b
147-
%a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
148-
store float %add.0, ptr %a.store.index, align 4
148+
%a.store.index = getelementptr [256 x [256 x float]], ptr @A, i64 0, i64 %i, i64 %j
149+
store float %add.0, ptr %a.store.index
149150
%add.1 = fadd float %c0, %c1
150-
store float %add.1, ptr %c.store.index, align 4
151-
%j.next = add nuw nsw i64 %j, 1
151+
store float %add.1, ptr %c.store.index
152+
%j.next = add i64 %j, 1
152153
%cmp.j = icmp eq i64 %j.next, 256
153154
br i1 %cmp.j, label %for.i.inc, label %for.j.body
154155

155156
for.i.inc:
156-
%i.next = add nuw nsw i64 %i, 1
157+
%i.next = add i64 %i, 1
157158
%cmp.i = icmp eq i64 %i.next, 255
158159
br i1 %cmp.i, label %exit, label %for.i.header
159160

160161
exit:
161162
ret void
162163
}
163164

164-
; Check that no interchange is performed for the following loop. The j-loop is
165-
; vectorizable because all the dependencies are lexically forward. However, at
166-
; the moment, we don't analyze an execution order between instructions in
167-
; different BBs, so fail to determine that the j-loop is vectorizable.
168-
; Therefore, no exchange is performed.
165+
; Check that no interchange is performed for the following loop. Interchanging
166+
; the j-loop and k-loop makes the innermost loop vectorizble, since the j-loop
167+
; has only forward dependencies. However, at the moment, a loop body consisting
168+
; of multiple BBs is handled pesimistically. Hence the j-loop isn't moved to
169+
; the innermost place.
169170
;
170171
; for (int i = 0; i < 255; i++) {
171172
; for (int j = 0; j < 255; j++) {
172173
; for (int k = 0; k < 128; k++) {
173174
; E[i][j][k] = D[i+1][j+1][2*k];
174175
; if (cond)
175-
; D[i][j][k+1] += 1.0;
176+
; D[i][j][k+1] = 1.0;
176177
; }
177178
; }
178179

@@ -194,30 +195,28 @@ entry:
194195

195196
for.i.header:
196197
%i = phi i64 [ 0, %entry ], [ %i.inc, %for.i.inc ]
197-
%i.inc = add nsw i64 %i, 1
198+
%i.inc = add i64 %i, 1
198199
br label %for.j.header
199200

200201
for.j.header:
201202
%j = phi i64 [ 0, %for.i.header ], [ %j.inc, %for.j.inc ]
202-
%j.inc = add nsw i64 %j, 1
203+
%j.inc = add i64 %j, 1
203204
br label %for.k.body
204205

205206
for.k.body:
206207
%k = phi i64 [ 0, %for.j.header ], [ %k.inc, %for.k.inc ]
207-
%k.inc = add nsw i64 %k, 1
208-
%k.2 = mul nsw i64 %k, 2
209-
%d.index = getelementptr nuw inbounds [256 x [256 x [256 x float]]], ptr @D, i64 %i.inc, i64 %j.inc, i64 %k.2
210-
%e.index = getelementptr nuw inbounds [256 x [256 x [256 x float]]], ptr @E, i64 %i, i64 %j, i64 %k
211-
%d.load = load float, ptr %d.index, align 4
212-
store float %d.load, ptr %e.index, align 4
208+
%k.inc = add i64 %k, 1
209+
%k.2 = mul i64 %k, 2
210+
%d.index = getelementptr [256 x [256 x [256 x float]]], ptr @D, i64 0, i64 %i.inc, i64 %j.inc, i64 %k.2
211+
%e.index = getelementptr [256 x [256 x [256 x float]]], ptr @E, i64 0, i64 %i, i64 %j, i64 %k
212+
%d.load = load float, ptr %d.index
213+
store float %d.load, ptr %e.index
213214
%cond = freeze i1 undef
214215
br i1 %cond, label %if.then, label %for.k.inc
215216

216217
if.then:
217-
%d.index2 = getelementptr nuw inbounds [256 x [256 x [256 x float]]], ptr @D, i64 %i, i64 %j, i64 %k.inc
218-
%d.load2 = load float, ptr %d.index2, align 4
219-
%add = fadd float %d.load2, 1.0
220-
store float %add, ptr %d.index2, align 4
218+
%d.index2 = getelementptr [256 x [256 x [256 x float]]], ptr @D, i64 0, i64 %i, i64 %j, i64 %k.inc
219+
store float 1.0, ptr %d.index2
221220
br label %for.k.inc
222221

223222
for.k.inc:

0 commit comments

Comments
 (0)