Skip to content

Commit ccc96e6

Browse files
committed
[LV] Add tests where vector trip count is known equal to VFxUF.
Add additional tests to cover the case where the trip count isn't equal to VFxUF, but the vector trip count is.
1 parent e650c4b commit ccc96e6

File tree

2 files changed

+277
-0
lines changed

2 files changed

+277
-0
lines changed

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,3 +281,147 @@ exit:
281281
%res = phi i64 [ %iv, %loop.header ], [ 1, %loop.latch ]
282282
ret i64 %res
283283
}
284+
285+
define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosync nofree {
286+
; VF8UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16(
287+
; VF8UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] {
288+
; VF8UF1-NEXT: [[ENTRY:.*]]:
289+
; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
290+
; VF8UF1: [[VECTOR_PH]]:
291+
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
292+
; VF8UF1: [[VECTOR_BODY]]:
293+
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
294+
; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
295+
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
296+
; VF8UF1-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
297+
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
298+
; VF8UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP1]])
299+
; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
300+
; VF8UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
301+
; VF8UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
302+
; VF8UF1: [[MIDDLE_SPLIT]]:
303+
; VF8UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
304+
; VF8UF1: [[MIDDLE_BLOCK]]:
305+
; VF8UF1-NEXT: br label %[[SCALAR_PH]]
306+
; VF8UF1: [[VECTOR_EARLY_EXIT]]:
307+
; VF8UF1-NEXT: br label %[[EXIT:.*]]
308+
; VF8UF1: [[SCALAR_PH]]:
309+
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
310+
; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]]
311+
; VF8UF1: [[LOOP_HEADER]]:
312+
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
313+
; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
314+
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
315+
; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
316+
; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
317+
; VF8UF1: [[LOOP_LATCH]]:
318+
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
319+
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
320+
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
321+
; VF8UF1: [[EXIT]]:
322+
; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
323+
; VF8UF1-NEXT: ret i8 [[RES]]
324+
;
325+
; VF8UF2-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16(
326+
; VF8UF2-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] {
327+
; VF8UF2-NEXT: [[ENTRY:.*]]:
328+
; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
329+
; VF8UF2: [[VECTOR_PH]]:
330+
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
331+
; VF8UF2: [[VECTOR_BODY]]:
332+
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
333+
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
334+
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8
335+
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1
336+
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
337+
; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
338+
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer
339+
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
340+
; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP2]], [[TMP3]]
341+
; VF8UF2-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]])
342+
; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
343+
; VF8UF2-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
344+
; VF8UF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
345+
; VF8UF2: [[MIDDLE_SPLIT]]:
346+
; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
347+
; VF8UF2: [[MIDDLE_BLOCK]]:
348+
; VF8UF2-NEXT: br label %[[SCALAR_PH]]
349+
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
350+
; VF8UF2-NEXT: br label %[[EXIT:.*]]
351+
; VF8UF2: [[SCALAR_PH]]:
352+
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
353+
; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
354+
; VF8UF2: [[LOOP_HEADER]]:
355+
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
356+
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
357+
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
358+
; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
359+
; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
360+
; VF8UF2: [[LOOP_LATCH]]:
361+
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
362+
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
363+
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
364+
; VF8UF2: [[EXIT]]:
365+
; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
366+
; VF8UF2-NEXT: ret i8 [[RES]]
367+
;
368+
; VF16UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16(
369+
; VF16UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] {
370+
; VF16UF1-NEXT: [[ENTRY:.*]]:
371+
; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
372+
; VF16UF1: [[VECTOR_PH]]:
373+
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
374+
; VF16UF1: [[VECTOR_BODY]]:
375+
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
376+
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
377+
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1
378+
; VF16UF1-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
379+
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
380+
; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]])
381+
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
382+
; VF16UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]]
383+
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
384+
; VF16UF1: [[MIDDLE_SPLIT]]:
385+
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
386+
; VF16UF1: [[MIDDLE_BLOCK]]:
387+
; VF16UF1-NEXT: br label %[[SCALAR_PH]]
388+
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
389+
; VF16UF1-NEXT: br label %[[EXIT:.*]]
390+
; VF16UF1: [[SCALAR_PH]]:
391+
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
392+
; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
393+
; VF16UF1: [[LOOP_HEADER]]:
394+
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
395+
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
396+
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
397+
; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
398+
; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
399+
; VF16UF1: [[LOOP_LATCH]]:
400+
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
401+
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
402+
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
403+
; VF16UF1: [[EXIT]]:
404+
; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
405+
; VF16UF1-NEXT: ret i8 [[RES]]
406+
;
407+
entry:
408+
br label %loop.header
409+
410+
loop.header:
411+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
412+
%p.src = getelementptr inbounds i8, ptr %A, i64 %iv
413+
%l = load i8, ptr %p.src, align 1
414+
%c = icmp eq i8 %l, 0
415+
br i1 %c, label %exit, label %loop.latch
416+
417+
loop.latch:
418+
%iv.next = add nsw i64 %iv, 1
419+
%cmp = icmp eq i64 %iv.next, 17
420+
br i1 %cmp, label %exit, label %loop.header
421+
422+
exit:
423+
%res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ]
424+
ret i8 %res
425+
}
426+
427+

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1218,6 +1218,133 @@ exit:
12181218
ret void
12191219
}
12201220

1221+
define void @test_vector_tc_eq_16(ptr %A) {
1222+
; VF8UF1-LABEL: define void @test_vector_tc_eq_16(
1223+
; VF8UF1-SAME: ptr [[A:%.*]]) {
1224+
; VF8UF1-NEXT: [[ENTRY:.*]]:
1225+
; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1226+
; VF8UF1: [[VECTOR_PH]]:
1227+
; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1228+
; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
1229+
; VF8UF1: [[VECTOR_BODY]]:
1230+
; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1231+
; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1232+
; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
1233+
; VF8UF1-NEXT: [[TMP1:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
1234+
; VF8UF1-NEXT: store <8 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
1235+
; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1236+
; VF8UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1237+
; VF8UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1238+
; VF8UF1: [[MIDDLE_BLOCK]]:
1239+
; VF8UF1-NEXT: br label %[[SCALAR_PH]]
1240+
; VF8UF1: [[SCALAR_PH]]:
1241+
; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1242+
; VF8UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1243+
; VF8UF1-NEXT: br label %[[LOOP:.*]]
1244+
; VF8UF1: [[LOOP]]:
1245+
; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1246+
; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1247+
; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1248+
; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1249+
; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1250+
; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1251+
; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1252+
; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1253+
; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
1254+
; VF8UF1: [[EXIT]]:
1255+
; VF8UF1-NEXT: ret void
1256+
;
1257+
; VF8UF2-LABEL: define void @test_vector_tc_eq_16(
1258+
; VF8UF2-SAME: ptr [[A:%.*]]) {
1259+
; VF8UF2-NEXT: [[ENTRY:.*]]:
1260+
; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1261+
; VF8UF2: [[VECTOR_PH]]:
1262+
; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1263+
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
1264+
; VF8UF2: [[VECTOR_BODY]]:
1265+
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1266+
; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1267+
; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
1268+
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
1269+
; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
1270+
; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
1271+
; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
1272+
; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1
1273+
; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1
1274+
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1275+
; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1276+
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1277+
; VF8UF2: [[MIDDLE_BLOCK]]:
1278+
; VF8UF2-NEXT: br label %[[SCALAR_PH]]
1279+
; VF8UF2: [[SCALAR_PH]]:
1280+
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1281+
; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1282+
; VF8UF2-NEXT: br label %[[LOOP:.*]]
1283+
; VF8UF2: [[LOOP]]:
1284+
; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1285+
; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1286+
; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1287+
; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1288+
; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1289+
; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1290+
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1291+
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1292+
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1293+
; VF8UF2: [[EXIT]]:
1294+
; VF8UF2-NEXT: ret void
1295+
;
1296+
; VF16UF1-LABEL: define void @test_vector_tc_eq_16(
1297+
; VF16UF1-SAME: ptr [[A:%.*]]) {
1298+
; VF16UF1-NEXT: [[ENTRY:.*]]:
1299+
; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1300+
; VF16UF1: [[VECTOR_PH]]:
1301+
; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1302+
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
1303+
; VF16UF1: [[VECTOR_BODY]]:
1304+
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1305+
; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1306+
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
1307+
; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
1308+
; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
1309+
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1310+
; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1311+
; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1312+
; VF16UF1: [[MIDDLE_BLOCK]]:
1313+
; VF16UF1-NEXT: br label %[[SCALAR_PH]]
1314+
; VF16UF1: [[SCALAR_PH]]:
1315+
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1316+
; VF16UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1317+
; VF16UF1-NEXT: br label %[[LOOP:.*]]
1318+
; VF16UF1: [[LOOP]]:
1319+
; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1320+
; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1321+
; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1322+
; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1323+
; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1324+
; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1325+
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1326+
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1327+
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1328+
; VF16UF1: [[EXIT]]:
1329+
; VF16UF1-NEXT: ret void
1330+
;
1331+
entry:
1332+
br label %loop
1333+
1334+
loop:
1335+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
1336+
%p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ]
1337+
%p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1
1338+
%l = load i8, ptr %p.src, align 1
1339+
%add = add nsw i8 %l, 10
1340+
store i8 %add, ptr %p.src
1341+
%iv.next = add nsw i64 %iv, 1
1342+
%cmp = icmp eq i64 %iv.next, 17
1343+
br i1 %cmp, label %exit, label %loop
1344+
1345+
exit:
1346+
ret void
1347+
}
12211348
;.
12221349
; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
12231350
; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1227,18 +1354,24 @@ exit:
12271354
; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
12281355
; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
12291356
; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1357+
; VF8UF1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1358+
; VF8UF1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
12301359
;.
12311360
; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
12321361
; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
12331362
; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
12341363
; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
12351364
; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
12361365
; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1366+
; VF8UF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1367+
; VF8UF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
12371368
;.
12381369
; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
12391370
; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
12401371
; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
12411372
; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
12421373
; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
12431374
; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1375+
; VF16UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1376+
; VF16UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
12441377
;.

0 commit comments

Comments
 (0)