@@ -288,9 +288,46 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
288288; COST-LABEL: define void @switch_all_dests_distinct(
289289; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
290290; COST-NEXT: [[ENTRY:.*]]:
291+ ; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
292+ ; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
293+ ; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
294+ ; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
295+ ; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
296+ ; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
297+ ; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
298+ ; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
299+ ; COST: [[VECTOR_PH]]:
300+ ; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
301+ ; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
302+ ; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
303+ ; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
304+ ; COST-NEXT: br label %[[VECTOR_BODY:.*]]
305+ ; COST: [[VECTOR_BODY]]:
306+ ; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
307+ ; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
308+ ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
309+ ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
310+ ; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
311+ ; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
312+ ; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
313+ ; COST-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
314+ ; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]]
315+ ; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
316+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 1), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP8]])
317+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
318+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP6]])
319+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
320+ ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
321+ ; COST-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
322+ ; COST-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
323+ ; COST: [[MIDDLE_BLOCK]]:
324+ ; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
325+ ; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
326+ ; COST: [[SCALAR_PH]]:
327+ ; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
291328; COST-NEXT: br label %[[LOOP_HEADER:.*]]
292329; COST: [[LOOP_HEADER]]:
293- ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START ]], %[[ENTRY ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
330+ ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
294331; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
295332; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
296333; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -312,7 +349,7 @@ define void @switch_all_dests_distinct(ptr %start, ptr %end) {
312349; COST: [[LOOP_LATCH]]:
313350; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
314351; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
315- ; COST-NEXT: br i1 [[EC]], label %[[EXIT:.* ]], label %[[LOOP_HEADER]]
352+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+ ]]
316353; COST: [[EXIT]]:
317354; COST-NEXT: ret void
318355;
@@ -469,7 +506,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
469506; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
470507; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
471508; COST-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
472- ; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
509+ ; COST-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
473510; COST: [[MIDDLE_BLOCK]]:
474511; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
475512; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -502,7 +539,7 @@ define void @switch_all_dests_distinct_variant_using_branches(ptr %start, ptr %e
502539; COST: [[LOOP_LATCH]]:
503540; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
504541; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
505- ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7 :![0-9]+]]
542+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9 :![0-9]+]]
506543; COST: [[EXIT]]:
507544; COST-NEXT: ret void
508545;
@@ -639,9 +676,49 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
639676; COST-LABEL: define void @switch_multiple_common_dests(
640677; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
641678; COST-NEXT: [[ENTRY:.*]]:
679+ ; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
680+ ; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
681+ ; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
682+ ; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
683+ ; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
684+ ; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
685+ ; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
686+ ; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
687+ ; COST: [[VECTOR_PH]]:
688+ ; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
689+ ; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
690+ ; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
691+ ; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
692+ ; COST-NEXT: br label %[[VECTOR_BODY:.*]]
693+ ; COST: [[VECTOR_BODY]]:
694+ ; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
695+ ; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
696+ ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
697+ ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
698+ ; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
699+ ; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], zeroinitializer
700+ ; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
701+ ; COST-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 14)
702+ ; COST-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 15)
703+ ; COST-NEXT: [[TMP11:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
704+ ; COST-NEXT: [[TMP12:%.*]] = or <4 x i1> [[TMP8]], [[TMP9]]
705+ ; COST-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP12]], [[TMP10]]
706+ ; COST-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP11]], [[TMP13]]
707+ ; COST-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[TMP14]], splat (i1 true)
708+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]])
709+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP11]])
710+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
711+ ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
712+ ; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
713+ ; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
714+ ; COST: [[MIDDLE_BLOCK]]:
715+ ; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
716+ ; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
717+ ; COST: [[SCALAR_PH]]:
718+ ; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
642719; COST-NEXT: br label %[[LOOP_HEADER:.*]]
643720; COST: [[LOOP_HEADER]]:
644- ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START ]], %[[ENTRY ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
721+ ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
645722; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
646723; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
647724; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -662,7 +739,7 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
662739; COST: [[LOOP_LATCH]]:
663740; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
664741; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
665- ; COST-NEXT: br i1 [[EC]], label %[[EXIT:.* ]], label %[[LOOP_HEADER]]
742+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+ ]]
666743; COST: [[EXIT]]:
667744; COST-NEXT: ret void
668745;
@@ -790,9 +867,43 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
790867; COST-LABEL: define void @switch4_default_common_dest_with_case(
791868; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]]) #[[ATTR0]] {
792869; COST-NEXT: [[ENTRY:.*]]:
870+ ; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
871+ ; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
872+ ; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
873+ ; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
874+ ; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
875+ ; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
876+ ; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
877+ ; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
878+ ; COST: [[VECTOR_PH]]:
879+ ; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
880+ ; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
881+ ; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
882+ ; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
883+ ; COST-NEXT: br label %[[VECTOR_BODY:.*]]
884+ ; COST: [[VECTOR_BODY]]:
885+ ; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
886+ ; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
887+ ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
888+ ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
889+ ; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
890+ ; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
891+ ; COST-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
892+ ; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
893+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP7]])
894+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP6]])
895+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP9]])
896+ ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
897+ ; COST-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
898+ ; COST-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
899+ ; COST: [[MIDDLE_BLOCK]]:
900+ ; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
901+ ; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
902+ ; COST: [[SCALAR_PH]]:
903+ ; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
793904; COST-NEXT: br label %[[LOOP_HEADER:.*]]
794905; COST: [[LOOP_HEADER]]:
795- ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START ]], %[[ENTRY ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
906+ ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
796907; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
797908; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
798909; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -811,7 +922,7 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
811922; COST: [[LOOP_LATCH]]:
812923; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
813924; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
814- ; COST-NEXT: br i1 [[EC]], label %[[EXIT:.* ]], label %[[LOOP_HEADER]]
925+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP13:![0-9]+ ]]
815926; COST: [[EXIT]]:
816927; COST-NEXT: ret void
817928;
@@ -957,7 +1068,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
9571068; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]])
9581069; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
9591070; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
960- ; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
1071+ ; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14 :![0-9]+]]
9611072; COST: [[MIDDLE_BLOCK]]:
9621073; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
9631074; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -987,7 +1098,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
9871098; COST: [[LOOP_LATCH]]:
9881099; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
9891100; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
990- ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9 :![0-9]+]]
1101+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP15 :![0-9]+]]
9911102; COST: [[EXIT]]:
9921103; COST-NEXT: ret void
9931104;
@@ -1116,9 +1227,51 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
11161227; COST-LABEL: define void @br_under_switch_default_common_dest_with_case(
11171228; COST-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[X:%.*]]) #[[ATTR0]] {
11181229; COST-NEXT: [[ENTRY:.*]]:
1230+ ; COST-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1231+ ; COST-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1232+ ; COST-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -8
1233+ ; COST-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]]
1234+ ; COST-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 3
1235+ ; COST-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1236+ ; COST-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
1237+ ; COST-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1238+ ; COST: [[VECTOR_PH]]:
1239+ ; COST-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
1240+ ; COST-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1241+ ; COST-NEXT: [[TMP4:%.*]] = mul i64 [[N_VEC]], 8
1242+ ; COST-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP4]]
1243+ ; COST-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[X]], i64 0
1244+ ; COST-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1245+ ; COST-NEXT: br label %[[VECTOR_BODY:.*]]
1246+ ; COST: [[VECTOR_BODY]]:
1247+ ; COST-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1248+ ; COST-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1249+ ; COST-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[OFFSET_IDX]]
1250+ ; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[NEXT_GEP]], align 1
1251+ ; COST-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 -12)
1252+ ; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], splat (i64 13)
1253+ ; COST-NEXT: [[TMP8:%.*]] = or <4 x i1> [[TMP6]], [[TMP7]]
1254+ ; COST-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], splat (i1 true)
1255+ ; COST-NEXT: [[TMP10:%.*]] = icmp ule <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
1256+ ; COST-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP10]], splat (i1 true)
1257+ ; COST-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> [[TMP11]], <4 x i1> zeroinitializer
1258+ ; COST-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP12]], [[TMP7]]
1259+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP13]])
1260+ ; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP6]], <4 x i1> [[TMP10]], <4 x i1> zeroinitializer
1261+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 42), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP14]])
1262+ ; COST-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP9]]
1263+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> splat (i64 2), ptr [[NEXT_GEP]], i32 1, <4 x i1> [[TMP15]])
1264+ ; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1265+ ; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1266+ ; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1267+ ; COST: [[MIDDLE_BLOCK]]:
1268+ ; COST-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1269+ ; COST-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
1270+ ; COST: [[SCALAR_PH]]:
1271+ ; COST-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
11191272; COST-NEXT: br label %[[LOOP_HEADER:.*]]
11201273; COST: [[LOOP_HEADER]]:
1121- ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START ]], %[[ENTRY ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
1274+ ; COST-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
11221275; COST-NEXT: [[L:%.*]] = load i64, ptr [[PTR_IV]], align 1
11231276; COST-NEXT: switch i64 [[L]], label %[[DEFAULT:.*]] [
11241277; COST-NEXT: i64 -12, label %[[IF_THEN_1:.*]]
@@ -1140,7 +1293,7 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
11401293; COST: [[LOOP_LATCH]]:
11411294; COST-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i64, ptr [[PTR_IV]], i64 1
11421295; COST-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]]
1143- ; COST-NEXT: br i1 [[EC]], label %[[EXIT:.* ]], label %[[LOOP_HEADER]]
1296+ ; COST-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP17:![0-9]+ ]]
11441297; COST: [[EXIT]]:
11451298; COST-NEXT: ret void
11461299;
@@ -1433,6 +1586,14 @@ exit:
14331586; COST: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
14341587; COST: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
14351588; COST: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1589+ ; COST: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
1590+ ; COST: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
1591+ ; COST: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
1592+ ; COST: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
1593+ ; COST: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
1594+ ; COST: [[LOOP15]] = distinct !{[[LOOP15]], [[META2]], [[META1]]}
1595+ ; COST: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
1596+ ; COST: [[LOOP17]] = distinct !{[[LOOP17]], [[META2]], [[META1]]}
14361597;.
14371598; FORCED: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
14381599; FORCED: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
0 commit comments