@@ -29,8 +29,7 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
2929; COST-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP6]], align 1
3030; COST-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 -12, i64 -12, i64 -12, i64 -12>
3131; COST-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
32- ; COST-NEXT: [[TMP9:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
33- ; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP9]]
32+ ; COST-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP7]], [[TMP8]]
3433; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
3534; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3635; COST-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
@@ -89,10 +88,8 @@ define void @switch_default_to_latch_common_dest(ptr %start, ptr %end) {
8988; FORCED-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 -12, i64 -12, i64 -12, i64 -12>
9089; FORCED-NEXT: [[TMP11:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD]], <i64 13, i64 13, i64 13, i64 13>
9190; FORCED-NEXT: [[TMP12:%.*]] = icmp eq <4 x i64> [[WIDE_LOAD4]], <i64 13, i64 13, i64 13, i64 13>
92- ; FORCED-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
93- ; FORCED-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
94- ; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP13]], [[TMP13]]
95- ; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP14]], [[TMP14]]
91+ ; FORCED-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP9]], [[TMP11]]
92+ ; FORCED-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP10]], [[TMP12]]
9693; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
9794; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
9895; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -734,16 +731,10 @@ define void @switch_multiple_common_dests(ptr %start, ptr %end) {
734731; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP28]], [[TMP36]]
735732; FORCED-NEXT: [[TMP39:%.*]] = xor <4 x i1> [[TMP37]], <i1 true, i1 true, i1 true, i1 true>
736733; FORCED-NEXT: [[TMP40:%.*]] = xor <4 x i1> [[TMP38]], <i1 true, i1 true, i1 true, i1 true>
737- ; FORCED-NEXT: [[TMP29:%.*]] = or <4 x i1> [[TMP35]], [[TMP35]]
738- ; FORCED-NEXT: [[TMP30:%.*]] = or <4 x i1> [[TMP36]], [[TMP36]]
739- ; FORCED-NEXT: [[TMP31:%.*]] = or <4 x i1> [[TMP29]], [[TMP35]]
740- ; FORCED-NEXT: [[TMP32:%.*]] = or <4 x i1> [[TMP30]], [[TMP36]]
741- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP31]])
742- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP32]])
743- ; FORCED-NEXT: [[TMP33:%.*]] = or <4 x i1> [[TMP27]], [[TMP27]]
744- ; FORCED-NEXT: [[TMP34:%.*]] = or <4 x i1> [[TMP28]], [[TMP28]]
745- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP33]])
746- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP34]])
734+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP7]], i32 1, <4 x i1> [[TMP35]])
735+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP36]])
736+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP27]])
737+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP28]])
747738; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP39]])
748739; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP40]])
749740; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -884,10 +875,8 @@ define void @switch4_default_common_dest_with_case(ptr %start, ptr %end) {
884875; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP12]])
885876; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
886877; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
887- ; FORCED-NEXT: [[TMP22:%.*]] = or <4 x i1> [[TMP20]], [[TMP20]]
888- ; FORCED-NEXT: [[TMP23:%.*]] = or <4 x i1> [[TMP21]], [[TMP21]]
889- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP22]])
890- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP23]])
878+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP20]])
879+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP21]])
891880; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
892881; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
893882; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
@@ -991,8 +980,7 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
991980; COST-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP7]], <4 x i1> [[TMP13]], <4 x i1> zeroinitializer
992981; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP6]], i32 1, <4 x i1> [[TMP11]])
993982; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP10]])
994- ; COST-NEXT: [[TMP15:%.*]] = or <4 x i1> [[TMP14]], [[TMP14]]
995- ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP15]])
983+ ; COST-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP6]], i32 1, <4 x i1> [[TMP14]])
996984; COST-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
997985; COST-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
998986; COST-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -1078,10 +1066,8 @@ define void @switch_under_br_default_common_dest_with_case(ptr %start, ptr %end,
10781066; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> zeroinitializer, ptr [[TMP8]], i32 1, <4 x i1> [[TMP26]])
10791067; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP15]])
10801068; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP16]])
1081- ; FORCED-NEXT: [[TMP17:%.*]] = or <4 x i1> [[TMP23]], [[TMP23]]
1082- ; FORCED-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP24]], [[TMP24]]
1083- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP17]])
1084- ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP18]])
1069+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP23]])
1070+ ; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP24]])
10851071; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
10861072; FORCED-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
10871073; FORCED-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
@@ -1238,10 +1224,8 @@ define void @br_under_switch_default_common_dest_with_case(ptr %start, ptr %end,
12381224; FORCED-NEXT: [[TMP33:%.*]] = select <4 x i1> [[TMP10]], <4 x i1> [[TMP18]], <4 x i1> zeroinitializer
12391225; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP32]])
12401226; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP33]])
1241- ; FORCED-NEXT: [[TMP34:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
1242- ; FORCED-NEXT: [[TMP35:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
1243- ; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP34]], [[TMP15]]
1244- ; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP35]], [[TMP16]]
1227+ ; FORCED-NEXT: [[TMP36:%.*]] = or <4 x i1> [[TMP32]], [[TMP15]]
1228+ ; FORCED-NEXT: [[TMP37:%.*]] = or <4 x i1> [[TMP33]], [[TMP16]]
12451229; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP36]])
12461230; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 2, i64 2, i64 2, i64 2>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP37]])
12471231; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
@@ -1405,24 +1389,8 @@ define void @large_number_of_cases(ptr %start, ptr %end) {
14051389; FORCED-NEXT: [[TMP38:%.*]] = or <4 x i1> [[TMP36]], [[TMP22]]
14061390; FORCED-NEXT: [[TMP39:%.*]] = or <4 x i1> [[TMP37]], [[TMP23]]
14071391; FORCED-NEXT: [[TMP40:%.*]] = or <4 x i1> [[TMP38]], [[TMP24]]
1408- ; FORCED-NEXT: [[TMP41:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
1409- ; FORCED-NEXT: [[TMP42:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
1410- ; FORCED-NEXT: [[TMP43:%.*]] = or <4 x i1> [[TMP41]], [[TMP41]]
1411- ; FORCED-NEXT: [[TMP44:%.*]] = or <4 x i1> [[TMP42]], [[TMP42]]
1412- ; FORCED-NEXT: [[TMP45:%.*]] = or <4 x i1> [[TMP43]], [[TMP41]]
1413- ; FORCED-NEXT: [[TMP46:%.*]] = or <4 x i1> [[TMP44]], [[TMP42]]
1414- ; FORCED-NEXT: [[TMP47:%.*]] = or <4 x i1> [[TMP45]], [[TMP41]]
1415- ; FORCED-NEXT: [[TMP48:%.*]] = or <4 x i1> [[TMP46]], [[TMP42]]
1416- ; FORCED-NEXT: [[TMP49:%.*]] = or <4 x i1> [[TMP47]], [[TMP41]]
1417- ; FORCED-NEXT: [[TMP50:%.*]] = or <4 x i1> [[TMP48]], [[TMP42]]
1418- ; FORCED-NEXT: [[TMP51:%.*]] = or <4 x i1> [[TMP49]], [[TMP41]]
1419- ; FORCED-NEXT: [[TMP52:%.*]] = or <4 x i1> [[TMP50]], [[TMP42]]
1420- ; FORCED-NEXT: [[TMP53:%.*]] = or <4 x i1> [[TMP51]], [[TMP41]]
1421- ; FORCED-NEXT: [[TMP54:%.*]] = or <4 x i1> [[TMP52]], [[TMP42]]
1422- ; FORCED-NEXT: [[TMP55:%.*]] = or <4 x i1> [[TMP53]], [[TMP41]]
1423- ; FORCED-NEXT: [[TMP56:%.*]] = or <4 x i1> [[TMP54]], [[TMP42]]
1424- ; FORCED-NEXT: [[TMP57:%.*]] = or <4 x i1> [[TMP55]], [[TMP41]]
1425- ; FORCED-NEXT: [[TMP58:%.*]] = or <4 x i1> [[TMP56]], [[TMP42]]
1392+ ; FORCED-NEXT: [[TMP57:%.*]] = or <4 x i1> [[TMP39]], [[TMP25]]
1393+ ; FORCED-NEXT: [[TMP58:%.*]] = or <4 x i1> [[TMP40]], [[TMP26]]
14261394; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP7]], i32 1, <4 x i1> [[TMP57]])
14271395; FORCED-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> <i64 42, i64 42, i64 42, i64 42>, ptr [[TMP8]], i32 1, <4 x i1> [[TMP58]])
14281396; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
0 commit comments