Skip to content

Commit 582c09a

Browse files
committed
update tests
1 parent 90ce698 commit 582c09a

File tree

2 files changed

+256
-17
lines changed

2 files changed

+256
-17
lines changed

llvm/test/Transforms/LoopVectorize/X86/predicate-switch.ll

Lines changed: 173 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,46 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
288288
; COST-LABEL: define void @switch_all_dests_distinct(
289289
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
290290
; COST-NEXT: [[ENTRY:.*]]:
291+
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
292+
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
293+
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
294+
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
295+
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
296+
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
297+
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
298+
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
299+
; COST: [[VECTOR_PH]]:
300+
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
301+
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
302+
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
303+
; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
304+
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
305+
; COST: [[VECTOR_BODY]]:
306+
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
307+
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
308+
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
309+
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
310+
; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
311+
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
312+
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
313+
; COST-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
314+
; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
315+
; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
316+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP8]])
317+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
318+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP6]])
319+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
320+
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
321+
; COST-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
322+
; COST-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
323+
; COST: [[MIDDLE_BLOCK]]:
324+
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
325+
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
326+
; COST: [[SCALAR_PH]]:
327+
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
291328
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
292329
; COST: [[LOOP_HEADER]]:
293-
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
330+
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
294331
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
295332
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
296333
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -312,7 +349,7 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
312349
; COST: [[LOOP_LATCH]]:
313350
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
314351
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
315-
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
352+
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
316353
; COST: [[EXIT]]:
317354
; COST-NEXT: ret void
318355
;
@@ -469,7 +506,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
469506
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
470507
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
471508
; COST-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
472-
; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
509+
; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
473510
; COST: [[MIDDLE_BLOCK]]:
474511
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
475512
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -502,7 +539,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
502539
; COST: [[LOOP_LATCH]]:
503540
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
504541
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
505-
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
542+
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
506543
; COST: [[EXIT]]:
507544
; COST-NEXT: ret void
508545
;
@@ -639,9 +676,49 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
639676
; COST-LABEL: define void @switch_multiple_common_dests(
640677
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
641678
; COST-NEXT: [[ENTRY:.*]]:
679+
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
680+
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
681+
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
682+
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
683+
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
684+
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
685+
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
686+
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
687+
; COST: [[VECTOR_PH]]:
688+
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
689+
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
690+
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
691+
; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
692+
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
693+
; COST: [[VECTOR_BODY]]:
694+
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
695+
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
696+
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
697+
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
698+
; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
699+
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
700+
; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
701+
; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14)
702+
; COST-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15)
703+
; COST-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
704+
; COST-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP9]]
705+
; COST-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP12]], [[TMP10]]
706+
; COST-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP11]], [[TMP13]]
707+
; COST-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
708+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]])
709+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
710+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
711+
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
712+
; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
713+
; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
714+
; COST: [[MIDDLE_BLOCK]]:
715+
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
716+
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
717+
; COST: [[SCALAR_PH]]:
718+
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
642719
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
643720
; COST: [[LOOP_HEADER]]:
644-
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
721+
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
645722
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
646723
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
647724
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -662,7 +739,7 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
662739
; COST: [[LOOP_LATCH]]:
663740
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
664741
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
665-
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
742+
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]]
666743
; COST: [[EXIT]]:
667744
; COST-NEXT: ret void
668745
;
@@ -790,9 +867,43 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
790867
; COST-LABEL: define void @switch4_default_common_dest_with_case(
791868
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
792869
; COST-NEXT: [[ENTRY:.*]]:
870+
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
871+
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
872+
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
873+
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
874+
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
875+
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
876+
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
877+
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
878+
; COST: [[VECTOR_PH]]:
879+
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
880+
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
881+
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
882+
; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
883+
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
884+
; COST: [[VECTOR_BODY]]:
885+
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
886+
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
887+
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
888+
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
889+
; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
890+
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
891+
; COST-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
892+
; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
893+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
894+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP6]])
895+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP9]])
896+
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
897+
; COST-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
898+
; COST-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
899+
; COST: [[MIDDLE_BLOCK]]:
900+
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
901+
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
902+
; COST: [[SCALAR_PH]]:
903+
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
793904
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
794905
; COST: [[LOOP_HEADER]]:
795-
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
906+
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
796907
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
797908
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
798909
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -811,7 +922,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
811922
; COST: [[LOOP_LATCH]]:
812923
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
813924
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
814-
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
925+
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+]]
815926
; COST: [[EXIT]]:
816927
; COST-NEXT: ret void
817928
;
@@ -957,7 +1068,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
9571068
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]])
9581069
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
9591070
; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
960-
; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1071+
; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
9611072
; COST: [[MIDDLE_BLOCK]]:
9621073
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
9631074
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -987,7 +1098,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
9871098
; COST: [[LOOP_LATCH]]:
9881099
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
9891100
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
990-
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
1101+
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]]
9911102
; COST: [[EXIT]]:
9921103
; COST-NEXT: ret void
9931104
;
@@ -1116,9 +1227,51 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
11161227
; COST-LABEL: define void @br_under_switch_default_common_dest_with_case(
11171228
; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
11181229
; COST-NEXT: [[ENTRY:.*]]:
1230+
; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1231+
; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1232+
; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
1233+
; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1234+
; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
1235+
; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1236+
; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
1237+
; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1238+
; COST: [[VECTOR_PH]]:
1239+
; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
1240+
; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1241+
; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
1242+
; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
1243+
; COST-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
1244+
; COST-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1245+
; COST-NEXT: br label %[[VECTOR_BODY:.*]]
1246+
; COST: [[VECTOR_BODY]]:
1247+
; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1248+
; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1249+
; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
1250+
; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
1251+
; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
1252+
; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
1253+
; COST-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
1254+
; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
1255+
; COST-NEXT: [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1256+
; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
1257+
; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
1258+
; COST-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP12]], [[TMP7]]
1259+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]])
1260+
; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> [[TMP10]], <4 x i1> zeroinitializer
1261+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]])
1262+
; COST-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP9]]
1263+
; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
1264+
; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1265+
; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1266+
; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1267+
; COST: [[MIDDLE_BLOCK]]:
1268+
; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1269+
; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1270+
; COST: [[SCALAR_PH]]:
1271+
; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
11191272
; COST-NEXT: br label %[[LOOP_HEADER:.*]]
11201273
; COST: [[LOOP_HEADER]]:
1121-
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1274+
; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
11221275
; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
11231276
; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
11241277
; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -1140,7 +1293,7 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
11401293
; COST: [[LOOP_LATCH]]:
11411294
; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
11421295
; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1143-
; COST-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP_HEADER]]
1296+
; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+]]
11441297
; COST: [[EXIT]]:
11451298
; COST-NEXT: ret void
11461299
;
@@ -1433,6 +1586,14 @@ exit:
14331586
; COST: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
14341587
; COST: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
14351588
; COST: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1589+
; COST: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1590+
; COST: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
1591+
; COST: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1592+
; COST: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
1593+
; COST: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1594+
; COST: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
1595+
; COST: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1596+
; COST: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
14361597
;.
14371598
; FORCED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
14381599
; FORCED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}

0 commit comments

Comments
 (0)