@@ -833,13 +833,11 @@ define i32 @smax_i32_same(<4 x i32> %a, <4 x i32> %b) {
833833define float @nested_fadd_f32 (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
834834; CHECK-SD-LABEL: nested_fadd_f32:
835835; CHECK-SD: // %bb.0:
836- ; CHECK-SD-NEXT: faddp v1.4s, v1.4s, v1.4s
836+ ; CHECK-SD-NEXT: fadd v0.4s, v0.4s, v1.4s
837+ ; CHECK-SD-NEXT: fadd s2, s2, s3
837838; CHECK-SD-NEXT: faddp v0.4s, v0.4s, v0.4s
838- ; CHECK-SD-NEXT: faddp s1, v1.2s
839839; CHECK-SD-NEXT: faddp s0, v0.2s
840- ; CHECK-SD-NEXT: fadd s1, s1, s3
841840; CHECK-SD-NEXT: fadd s0, s0, s2
842- ; CHECK-SD-NEXT: fadd s0, s0, s1
843841; CHECK-SD-NEXT: ret
844842;
845843; CHECK-GI-LABEL: nested_fadd_f32:
@@ -905,15 +903,12 @@ define float @nested_fadd_f32_slow(<4 x float> %a, <4 x float> %b, float %c, flo
905903define float @nested_mul_f32 (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
906904; CHECK-SD-LABEL: nested_mul_f32:
907905; CHECK-SD: // %bb.0:
908- ; CHECK-SD-NEXT: ext v4.16b, v1.16b, v1.16b, #8
909- ; CHECK-SD-NEXT: ext v5.16b, v0.16b, v0.16b, #8
910- ; CHECK-SD-NEXT: fmul v1.2s, v1.2s, v4.2s
911- ; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v5.2s
912- ; CHECK-SD-NEXT: fmul s1, s1, v1.s[1]
906+ ; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v1.4s
907+ ; CHECK-SD-NEXT: fmul s2, s2, s3
908+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
909+ ; CHECK-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
913910; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
914- ; CHECK-SD-NEXT: fmul s1, s1, s3
915911; CHECK-SD-NEXT: fmul s0, s0, s2
916- ; CHECK-SD-NEXT: fmul s0, s0, s1
917912; CHECK-SD-NEXT: ret
918913;
919914; CHECK-GI-LABEL: nested_mul_f32:
@@ -941,12 +936,10 @@ define float @nested_mul_f32(<4 x float> %a, <4 x float> %b, float %c, float %d)
941936define i32 @nested_add_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
942937; CHECK-SD-LABEL: nested_add_i32:
943938; CHECK-SD: // %bb.0:
944- ; CHECK-SD-NEXT: addv s1, v1.4s
939+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
940+ ; CHECK-SD-NEXT: add w8, w0, w1
945941; CHECK-SD-NEXT: addv s0, v0.4s
946- ; CHECK-SD-NEXT: fmov w8, s1
947942; CHECK-SD-NEXT: fmov w9, s0
948- ; CHECK-SD-NEXT: add w9, w9, w0
949- ; CHECK-SD-NEXT: add w8, w8, w1
950943; CHECK-SD-NEXT: add w0, w9, w8
951944; CHECK-SD-NEXT: ret
952945;
@@ -971,12 +964,10 @@ define i32 @nested_add_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
971964define i32 @nested_add_c1_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
972965; CHECK-SD-LABEL: nested_add_c1_i32:
973966; CHECK-SD: // %bb.0:
974- ; CHECK-SD-NEXT: addv s1, v1.4s
967+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
968+ ; CHECK-SD-NEXT: add w8, w0, w1
975969; CHECK-SD-NEXT: addv s0, v0.4s
976- ; CHECK-SD-NEXT: fmov w8, s1
977970; CHECK-SD-NEXT: fmov w9, s0
978- ; CHECK-SD-NEXT: add w9, w0, w9
979- ; CHECK-SD-NEXT: add w8, w8, w1
980971; CHECK-SD-NEXT: add w0, w9, w8
981972; CHECK-SD-NEXT: ret
982973;
@@ -1001,12 +992,10 @@ define i32 @nested_add_c1_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
1001992define i32 @nested_add_c2_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
1002993; CHECK-SD-LABEL: nested_add_c2_i32:
1003994; CHECK-SD: // %bb.0:
1004- ; CHECK-SD-NEXT: addv s1, v1.4s
995+ ; CHECK-SD-NEXT: add v0.4s, v0.4s, v1.4s
996+ ; CHECK-SD-NEXT: add w8, w0, w1
1005997; CHECK-SD-NEXT: addv s0, v0.4s
1006- ; CHECK-SD-NEXT: fmov w8, s1
1007998; CHECK-SD-NEXT: fmov w9, s0
1008- ; CHECK-SD-NEXT: add w9, w9, w0
1009- ; CHECK-SD-NEXT: add w8, w1, w8
1010999; CHECK-SD-NEXT: add w0, w9, w8
10111000; CHECK-SD-NEXT: ret
10121001;
@@ -1065,19 +1054,14 @@ define i32 @nested_add_manyreduct_i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
10651054define i32 @nested_mul_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
10661055; CHECK-SD-LABEL: nested_mul_i32:
10671056; CHECK-SD: // %bb.0:
1068- ; CHECK-SD-NEXT: ext v3.16b, v0.16b , v0.16b, #8
1069- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1070- ; CHECK-SD-NEXT: mul v0.2s , v0.2s, v3.2s
1071- ; CHECK-SD-NEXT: mul v1 .2s, v1 .2s, v2 .2s
1072- ; CHECK-SD-NEXT: mov w8 , v0.s[1]
1057+ ; CHECK-SD-NEXT: mul v0.4s , v0.4s, v1.4s
1058+ ; CHECK-SD-NEXT: mul w8, w0, w1
1059+ ; CHECK-SD-NEXT: ext v1.16b , v0.16b, v0.16b, #8
1060+ ; CHECK-SD-NEXT: mul v0 .2s, v0 .2s, v1 .2s
1061+ ; CHECK-SD-NEXT: mov w9 , v0.s[1]
10731062; CHECK-SD-NEXT: fmov w10, s0
1074- ; CHECK-SD-NEXT: mov w9, v1.s[1]
1075- ; CHECK-SD-NEXT: mul w8, w10, w8
1076- ; CHECK-SD-NEXT: fmov w10, s1
10771063; CHECK-SD-NEXT: mul w9, w10, w9
1078- ; CHECK-SD-NEXT: mul w8, w8, w0
1079- ; CHECK-SD-NEXT: mul w9, w9, w1
1080- ; CHECK-SD-NEXT: mul w0, w8, w9
1064+ ; CHECK-SD-NEXT: mul w0, w9, w8
10811065; CHECK-SD-NEXT: ret
10821066;
10831067; CHECK-GI-LABEL: nested_mul_i32:
@@ -1107,19 +1091,14 @@ define i32 @nested_mul_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11071091define i32 @nested_and_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
11081092; CHECK-SD-LABEL: nested_and_i32:
11091093; CHECK-SD: // %bb.0:
1110- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1111- ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1112- ; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
1113- ; CHECK-SD-NEXT: and v0.8b, v0.8b, v3.8b
1114- ; CHECK-SD-NEXT: fmov x8, d1
1094+ ; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
1095+ ; CHECK-SD-NEXT: and w8, w0, w1
1096+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1097+ ; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
11151098; CHECK-SD-NEXT: fmov x9, d0
11161099; CHECK-SD-NEXT: lsr x10, x9, #32
1117- ; CHECK-SD-NEXT: lsr x11, x8, #32
1118- ; CHECK-SD-NEXT: and w9, w9, w0
1119- ; CHECK-SD-NEXT: and w8, w8, w1
1120- ; CHECK-SD-NEXT: and w9, w9, w10
1121- ; CHECK-SD-NEXT: and w8, w8, w11
1122- ; CHECK-SD-NEXT: and w0, w9, w8
1100+ ; CHECK-SD-NEXT: and w8, w9, w8
1101+ ; CHECK-SD-NEXT: and w0, w8, w10
11231102; CHECK-SD-NEXT: ret
11241103;
11251104; CHECK-GI-LABEL: nested_and_i32:
@@ -1149,19 +1128,14 @@ define i32 @nested_and_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11491128define i32 @nested_or_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
11501129; CHECK-SD-LABEL: nested_or_i32:
11511130; CHECK-SD: // %bb.0:
1152- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1153- ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1154- ; CHECK-SD-NEXT: orr v1.8b, v1.8b, v2.8b
1155- ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v3.8b
1156- ; CHECK-SD-NEXT: fmov x8, d1
1131+ ; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b
1132+ ; CHECK-SD-NEXT: orr w8, w0, w1
1133+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1134+ ; CHECK-SD-NEXT: orr v0.8b, v0.8b, v1.8b
11571135; CHECK-SD-NEXT: fmov x9, d0
11581136; CHECK-SD-NEXT: lsr x10, x9, #32
1159- ; CHECK-SD-NEXT: lsr x11, x8, #32
1160- ; CHECK-SD-NEXT: orr w9, w9, w0
1161- ; CHECK-SD-NEXT: orr w8, w8, w1
1162- ; CHECK-SD-NEXT: orr w9, w9, w10
1163- ; CHECK-SD-NEXT: orr w8, w8, w11
1164- ; CHECK-SD-NEXT: orr w0, w9, w8
1137+ ; CHECK-SD-NEXT: orr w8, w9, w8
1138+ ; CHECK-SD-NEXT: orr w0, w8, w10
11651139; CHECK-SD-NEXT: ret
11661140;
11671141; CHECK-GI-LABEL: nested_or_i32:
@@ -1191,19 +1165,14 @@ define i32 @nested_or_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
11911165define i32 @nested_xor_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
11921166; CHECK-SD-LABEL: nested_xor_i32:
11931167; CHECK-SD: // %bb.0:
1194- ; CHECK-SD-NEXT: ext v2.16b, v1.16b, v1.16b, #8
1195- ; CHECK-SD-NEXT: ext v3.16b, v0.16b, v0.16b, #8
1196- ; CHECK-SD-NEXT: eor v1.8b, v1.8b, v2.8b
1197- ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v3.8b
1198- ; CHECK-SD-NEXT: fmov x8, d1
1168+ ; CHECK-SD-NEXT: eor v0.16b, v0.16b, v1.16b
1169+ ; CHECK-SD-NEXT: eor w8, w0, w1
1170+ ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
1171+ ; CHECK-SD-NEXT: eor v0.8b, v0.8b, v1.8b
11991172; CHECK-SD-NEXT: fmov x9, d0
12001173; CHECK-SD-NEXT: lsr x10, x9, #32
1201- ; CHECK-SD-NEXT: lsr x11, x8, #32
1202- ; CHECK-SD-NEXT: eor w9, w9, w0
1203- ; CHECK-SD-NEXT: eor w8, w8, w1
1204- ; CHECK-SD-NEXT: eor w9, w9, w10
1205- ; CHECK-SD-NEXT: eor w8, w8, w11
1206- ; CHECK-SD-NEXT: eor w0, w9, w8
1174+ ; CHECK-SD-NEXT: eor w8, w9, w8
1175+ ; CHECK-SD-NEXT: eor w0, w8, w10
12071176; CHECK-SD-NEXT: ret
12081177;
12091178; CHECK-GI-LABEL: nested_xor_i32:
@@ -1233,14 +1202,11 @@ define i32 @nested_xor_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
12331202define i32 @nested_smin_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
12341203; CHECK-SD-LABEL: nested_smin_i32:
12351204; CHECK-SD: // %bb.0:
1205+ ; CHECK-SD-NEXT: smin v0.4s, v0.4s, v1.4s
1206+ ; CHECK-SD-NEXT: cmp w0, w1
1207+ ; CHECK-SD-NEXT: csel w8, w0, w1, lt
12361208; CHECK-SD-NEXT: sminv s0, v0.4s
1237- ; CHECK-SD-NEXT: sminv s1, v1.4s
12381209; CHECK-SD-NEXT: fmov w9, s0
1239- ; CHECK-SD-NEXT: fmov w8, s1
1240- ; CHECK-SD-NEXT: cmp w9, w0
1241- ; CHECK-SD-NEXT: csel w9, w9, w0, lt
1242- ; CHECK-SD-NEXT: cmp w8, w1
1243- ; CHECK-SD-NEXT: csel w8, w8, w1, lt
12441210; CHECK-SD-NEXT: cmp w9, w8
12451211; CHECK-SD-NEXT: csel w0, w9, w8, lt
12461212; CHECK-SD-NEXT: ret
@@ -1269,14 +1235,11 @@ define i32 @nested_smin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
12691235define i32 @nested_smax_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
12701236; CHECK-SD-LABEL: nested_smax_i32:
12711237; CHECK-SD: // %bb.0:
1238+ ; CHECK-SD-NEXT: smax v0.4s, v0.4s, v1.4s
1239+ ; CHECK-SD-NEXT: cmp w0, w1
1240+ ; CHECK-SD-NEXT: csel w8, w0, w1, gt
12721241; CHECK-SD-NEXT: smaxv s0, v0.4s
1273- ; CHECK-SD-NEXT: smaxv s1, v1.4s
12741242; CHECK-SD-NEXT: fmov w9, s0
1275- ; CHECK-SD-NEXT: fmov w8, s1
1276- ; CHECK-SD-NEXT: cmp w9, w0
1277- ; CHECK-SD-NEXT: csel w9, w9, w0, gt
1278- ; CHECK-SD-NEXT: cmp w8, w1
1279- ; CHECK-SD-NEXT: csel w8, w8, w1, gt
12801243; CHECK-SD-NEXT: cmp w9, w8
12811244; CHECK-SD-NEXT: csel w0, w9, w8, gt
12821245; CHECK-SD-NEXT: ret
@@ -1305,14 +1268,11 @@ define i32 @nested_smax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13051268define i32 @nested_umin_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
13061269; CHECK-SD-LABEL: nested_umin_i32:
13071270; CHECK-SD: // %bb.0:
1271+ ; CHECK-SD-NEXT: umin v0.4s, v0.4s, v1.4s
1272+ ; CHECK-SD-NEXT: cmp w0, w1
1273+ ; CHECK-SD-NEXT: csel w8, w0, w1, lo
13081274; CHECK-SD-NEXT: uminv s0, v0.4s
1309- ; CHECK-SD-NEXT: uminv s1, v1.4s
13101275; CHECK-SD-NEXT: fmov w9, s0
1311- ; CHECK-SD-NEXT: fmov w8, s1
1312- ; CHECK-SD-NEXT: cmp w9, w0
1313- ; CHECK-SD-NEXT: csel w9, w9, w0, lo
1314- ; CHECK-SD-NEXT: cmp w8, w1
1315- ; CHECK-SD-NEXT: csel w8, w8, w1, lo
13161276; CHECK-SD-NEXT: cmp w9, w8
13171277; CHECK-SD-NEXT: csel w0, w9, w8, lo
13181278; CHECK-SD-NEXT: ret
@@ -1341,14 +1301,11 @@ define i32 @nested_umin_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13411301define i32 @nested_umax_i32 (<4 x i32 > %a , <4 x i32 > %b , i32 %c , i32 %d ) {
13421302; CHECK-SD-LABEL: nested_umax_i32:
13431303; CHECK-SD: // %bb.0:
1304+ ; CHECK-SD-NEXT: umax v0.4s, v0.4s, v1.4s
1305+ ; CHECK-SD-NEXT: cmp w0, w1
1306+ ; CHECK-SD-NEXT: csel w8, w0, w1, hi
13441307; CHECK-SD-NEXT: umaxv s0, v0.4s
1345- ; CHECK-SD-NEXT: umaxv s1, v1.4s
13461308; CHECK-SD-NEXT: fmov w9, s0
1347- ; CHECK-SD-NEXT: fmov w8, s1
1348- ; CHECK-SD-NEXT: cmp w9, w0
1349- ; CHECK-SD-NEXT: csel w9, w9, w0, hi
1350- ; CHECK-SD-NEXT: cmp w8, w1
1351- ; CHECK-SD-NEXT: csel w8, w8, w1, hi
13521309; CHECK-SD-NEXT: cmp w9, w8
13531310; CHECK-SD-NEXT: csel w0, w9, w8, hi
13541311; CHECK-SD-NEXT: ret
@@ -1377,11 +1334,10 @@ define i32 @nested_umax_i32(<4 x i32> %a, <4 x i32> %b, i32 %c, i32 %d) {
13771334define float @nested_fmin_float (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
13781335; CHECK-SD-LABEL: nested_fmin_float:
13791336; CHECK-SD: // %bb.0:
1380- ; CHECK-SD-NEXT: fminnmv s1, v1.4s
1337+ ; CHECK-SD-NEXT: fminnm v0.4s, v0.4s, v1.4s
1338+ ; CHECK-SD-NEXT: fminnm s2, s2, s3
13811339; CHECK-SD-NEXT: fminnmv s0, v0.4s
1382- ; CHECK-SD-NEXT: fminnm s1, s1, s3
13831340; CHECK-SD-NEXT: fminnm s0, s0, s2
1384- ; CHECK-SD-NEXT: fminnm s0, s0, s1
13851341; CHECK-SD-NEXT: ret
13861342;
13871343; CHECK-GI-LABEL: nested_fmin_float:
@@ -1403,11 +1359,10 @@ define float @nested_fmin_float(<4 x float> %a, <4 x float> %b, float %c, float
14031359define float @nested_fmax_float (<4 x float > %a , <4 x float > %b , float %c , float %d ) {
14041360; CHECK-SD-LABEL: nested_fmax_float:
14051361; CHECK-SD: // %bb.0:
1406- ; CHECK-SD-NEXT: fmaxnmv s1, v1.4s
1362+ ; CHECK-SD-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
1363+ ; CHECK-SD-NEXT: fmaxnm s2, s2, s3
14071364; CHECK-SD-NEXT: fmaxnmv s0, v0.4s
1408- ; CHECK-SD-NEXT: fmaxnm s1, s1, s3
14091365; CHECK-SD-NEXT: fmaxnm s0, s0, s2
1410- ; CHECK-SD-NEXT: fmaxnm s0, s0, s1
14111366; CHECK-SD-NEXT: ret
14121367;
14131368; CHECK-GI-LABEL: nested_fmax_float:
0 commit comments