Skip to content

Commit cc2f403

Browse files
committed
SelectionDAG/expandFMINNUM_FMAXNUM: skips vector if SETCC/VSELECT is not legal
If SETCC or VSELECT is not legal for vector, we should not expand it, instead we can split the vectors. So that, some simple scale instructions can be emitted instead of some pairs of comparation+selection.
1 parent 89d2a9d commit cc2f403

File tree

3 files changed

+132
-264
lines changed

3 files changed

+132
-264
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8424,6 +8424,11 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
84248424

84258425
if (Node->getFlags().hasNoNaNs()) {
84268426
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8427+
EVT VT = Node->getValueType(0);
8428+
if ((!isCondCodeLegal(Pred, VT.getSimpleVT()) ||
8429+
!isOperationLegalOrCustom(ISD::VSELECT, VT)) &&
8430+
VT.isVector())
8431+
return SDValue();
84278432
SDValue Op1 = Node->getOperand(0);
84288433
SDValue Op2 = Node->getOperand(1);
84298434
SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);

llvm/test/CodeGen/ARM/minnum-maxnum-intrinsics.ll

Lines changed: 59 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -609,26 +609,21 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) {
609609
define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) {
610610
; ARMV7-LABEL: fminnumv432_one_zero_intrinsic:
611611
; ARMV7: @ %bb.0:
612-
; ARMV7-NEXT: vmov d3, r2, r3
613-
; ARMV7-NEXT: vmov d2, r0, r1
614-
; ARMV7-NEXT: vmov.f32 s0, #-1.000000e+00
615-
; ARMV7-NEXT: vcmp.f32 s5, #0
616-
; ARMV7-NEXT: vldr s1, .LCPI18_0
612+
; ARMV7-NEXT: vmov d1, r2, r3
613+
; ARMV7-NEXT: vldr s8, .LCPI18_0
614+
; ARMV7-NEXT: vmov d0, r0, r1
615+
; ARMV7-NEXT: vmov.f32 s10, #-1.000000e+00
616+
; ARMV7-NEXT: vcmp.f32 s1, #0
617617
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
618-
; ARMV7-NEXT: vcmp.f32 s7, s0
619-
; ARMV7-NEXT: vmovlt.f32 s1, s5
620-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
621-
; ARMV7-NEXT: vmov.f32 s3, s0
622-
; ARMV7-NEXT: vcmp.f32 s6, s0
623-
; ARMV7-NEXT: vmovlt.f32 s3, s7
624-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
625-
; ARMV7-NEXT: vmov.f32 s2, s0
626-
; ARMV7-NEXT: vcmp.f32 s4, s0
627-
; ARMV7-NEXT: vmovlt.f32 s2, s6
628-
; ARMV7-NEXT: vmrs APSR_nzcv, fpscr
629-
; ARMV7-NEXT: vmovlt.f32 s0, s4
630-
; ARMV7-NEXT: vmov r2, r3, d1
631-
; ARMV7-NEXT: vmov r0, r1, d0
618+
; ARMV7-NEXT: vmov.f32 s4, s3
619+
; ARMV7-NEXT: vmin.f32 d6, d2, d5
620+
; ARMV7-NEXT: vmin.f32 d3, d1, d5
621+
; ARMV7-NEXT: vmin.f32 d2, d0, d5
622+
; ARMV7-NEXT: vmovlt.f32 s8, s1
623+
; ARMV7-NEXT: vmov.f32 s5, s8
624+
; ARMV7-NEXT: vmov.f32 s7, s12
625+
; ARMV7-NEXT: vmov r0, r1, d2
626+
; ARMV7-NEXT: vmov r2, r3, d3
632627
; ARMV7-NEXT: bx lr
633628
; ARMV7-NEXT: .p2align 2
634629
; ARMV7-NEXT: @ %bb.1:
@@ -918,15 +913,11 @@ define <2 x double> @fminnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
918913
; ARMV8: @ %bb.0:
919914
; ARMV8-NEXT: mov r12, sp
920915
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
921-
; ARMV8-NEXT: vmov d18, r0, r1
922-
; ARMV8-NEXT: vmov d19, r2, r3
923-
; ARMV8-NEXT: vcmp.f64 d16, d18
924-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
925-
; ARMV8-NEXT: vcmp.f64 d17, d19
926-
; ARMV8-NEXT: vselgt.f64 d18, d18, d16
927-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
928-
; ARMV8-NEXT: vmov r0, r1, d18
929-
; ARMV8-NEXT: vselgt.f64 d16, d19, d17
916+
; ARMV8-NEXT: vmov d19, r0, r1
917+
; ARMV8-NEXT: vmov d18, r2, r3
918+
; ARMV8-NEXT: vminnm.f64 d19, d19, d16
919+
; ARMV8-NEXT: vminnm.f64 d16, d18, d17
920+
; ARMV8-NEXT: vmov r0, r1, d19
930921
; ARMV8-NEXT: vmov r2, r3, d16
931922
; ARMV8-NEXT: bx lr
932923
;
@@ -970,15 +961,11 @@ define <2 x double> @fminnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
970961
; ARMV8: @ %bb.0:
971962
; ARMV8-NEXT: mov r12, sp
972963
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
973-
; ARMV8-NEXT: vmov d18, r0, r1
974-
; ARMV8-NEXT: vmov d19, r2, r3
975-
; ARMV8-NEXT: vcmp.f64 d16, d18
976-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
977-
; ARMV8-NEXT: vcmp.f64 d17, d19
978-
; ARMV8-NEXT: vselgt.f64 d18, d18, d16
979-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
980-
; ARMV8-NEXT: vmov r0, r1, d18
981-
; ARMV8-NEXT: vselgt.f64 d16, d19, d17
964+
; ARMV8-NEXT: vmov d19, r0, r1
965+
; ARMV8-NEXT: vmov d18, r2, r3
966+
; ARMV8-NEXT: vminnm.f64 d19, d19, d16
967+
; ARMV8-NEXT: vminnm.f64 d16, d18, d17
968+
; ARMV8-NEXT: vmov r0, r1, d19
982969
; ARMV8-NEXT: vmov r2, r3, d16
983970
; ARMV8-NEXT: bx lr
984971
;
@@ -1020,16 +1007,12 @@ define <2 x double> @fminnumv264_non_zero_intrinsic(<2 x double> %x) {
10201007
;
10211008
; ARMV8-LABEL: fminnumv264_non_zero_intrinsic:
10221009
; ARMV8: @ %bb.0:
1023-
; ARMV8-NEXT: vmov d17, r0, r1
10241010
; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00
1025-
; ARMV8-NEXT: vcmp.f64 d16, d17
1026-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1027-
; ARMV8-NEXT: vmov d18, r2, r3
1028-
; ARMV8-NEXT: vcmp.f64 d16, d18
1029-
; ARMV8-NEXT: vselgt.f64 d17, d17, d16
1030-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1031-
; ARMV8-NEXT: vmov r0, r1, d17
1032-
; ARMV8-NEXT: vselgt.f64 d16, d18, d16
1011+
; ARMV8-NEXT: vmov d18, r0, r1
1012+
; ARMV8-NEXT: vmov d17, r2, r3
1013+
; ARMV8-NEXT: vminnm.f64 d18, d18, d16
1014+
; ARMV8-NEXT: vminnm.f64 d16, d17, d16
1015+
; ARMV8-NEXT: vmov r0, r1, d18
10331016
; ARMV8-NEXT: vmov r2, r3, d16
10341017
; ARMV8-NEXT: bx lr
10351018
;
@@ -1070,18 +1053,14 @@ define <2 x double> @fminnumv264_one_zero_intrinsic(<2 x double> %x) {
10701053
;
10711054
; ARMV8-LABEL: fminnumv264_one_zero_intrinsic:
10721055
; ARMV8: @ %bb.0:
1073-
; ARMV8-NEXT: vmov d19, r2, r3
1074-
; ARMV8-NEXT: vcmp.f64 d19, #0
1075-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1076-
; ARMV8-NEXT: vmov d18, r0, r1
10771056
; ARMV8-NEXT: vmov.f64 d16, #-1.000000e+00
1078-
; ARMV8-NEXT: vcmp.f64 d16, d18
1057+
; ARMV8-NEXT: vmov d18, r0, r1
10791058
; ARMV8-NEXT: vmov.i32 d17, #0x0
1080-
; ARMV8-NEXT: vmovlt.f64 d17, d19
1081-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1082-
; ARMV8-NEXT: vmov r2, r3, d17
1083-
; ARMV8-NEXT: vselgt.f64 d16, d18, d16
1059+
; ARMV8-NEXT: vminnm.f64 d16, d18, d16
1060+
; ARMV8-NEXT: vmov d19, r2, r3
1061+
; ARMV8-NEXT: vminnm.f64 d17, d19, d17
10841062
; ARMV8-NEXT: vmov r0, r1, d16
1063+
; ARMV8-NEXT: vmov r2, r3, d17
10851064
; ARMV8-NEXT: bx lr
10861065
;
10871066
; ARMV8M-LABEL: fminnumv264_one_zero_intrinsic:
@@ -1129,15 +1108,11 @@ define <2 x double> @fmaxnumv264_intrinsic(<2 x double> %x, <2 x double> %y) {
11291108
; ARMV8: @ %bb.0:
11301109
; ARMV8-NEXT: mov r12, sp
11311110
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
1132-
; ARMV8-NEXT: vmov d18, r0, r1
1133-
; ARMV8-NEXT: vcmp.f64 d18, d16
1134-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1135-
; ARMV8-NEXT: vmov d19, r2, r3
1136-
; ARMV8-NEXT: vcmp.f64 d19, d17
1137-
; ARMV8-NEXT: vselgt.f64 d18, d18, d16
1138-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1139-
; ARMV8-NEXT: vmov r0, r1, d18
1140-
; ARMV8-NEXT: vselgt.f64 d16, d19, d17
1111+
; ARMV8-NEXT: vmov d19, r0, r1
1112+
; ARMV8-NEXT: vmov d18, r2, r3
1113+
; ARMV8-NEXT: vmaxnm.f64 d19, d19, d16
1114+
; ARMV8-NEXT: vmaxnm.f64 d16, d18, d17
1115+
; ARMV8-NEXT: vmov r0, r1, d19
11411116
; ARMV8-NEXT: vmov r2, r3, d16
11421117
; ARMV8-NEXT: bx lr
11431118
;
@@ -1181,15 +1156,11 @@ define <2 x double> @fmaxnumv264_nsz_intrinsic(<2 x double> %x, <2 x double> %y)
11811156
; ARMV8: @ %bb.0:
11821157
; ARMV8-NEXT: mov r12, sp
11831158
; ARMV8-NEXT: vld1.64 {d16, d17}, [r12]
1184-
; ARMV8-NEXT: vmov d18, r0, r1
1185-
; ARMV8-NEXT: vcmp.f64 d18, d16
1186-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1187-
; ARMV8-NEXT: vmov d19, r2, r3
1188-
; ARMV8-NEXT: vcmp.f64 d19, d17
1189-
; ARMV8-NEXT: vselgt.f64 d18, d18, d16
1190-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1191-
; ARMV8-NEXT: vmov r0, r1, d18
1192-
; ARMV8-NEXT: vselgt.f64 d16, d19, d17
1159+
; ARMV8-NEXT: vmov d19, r0, r1
1160+
; ARMV8-NEXT: vmov d18, r2, r3
1161+
; ARMV8-NEXT: vmaxnm.f64 d19, d19, d16
1162+
; ARMV8-NEXT: vmaxnm.f64 d16, d18, d17
1163+
; ARMV8-NEXT: vmov r0, r1, d19
11931164
; ARMV8-NEXT: vmov r2, r3, d16
11941165
; ARMV8-NEXT: bx lr
11951166
;
@@ -1236,18 +1207,14 @@ define <2 x double> @fmaxnumv264_zero_intrinsic(<2 x double> %x) {
12361207
;
12371208
; ARMV8-LABEL: fmaxnumv264_zero_intrinsic:
12381209
; ARMV8: @ %bb.0:
1239-
; ARMV8-NEXT: vmov d18, r0, r1
12401210
; ARMV8-NEXT: vldr d16, .LCPI30_0
1241-
; ARMV8-NEXT: vcmp.f64 d18, #0
1242-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1243-
; ARMV8-NEXT: vmov d19, r2, r3
1244-
; ARMV8-NEXT: vcmp.f64 d19, d16
1211+
; ARMV8-NEXT: vmov d18, r2, r3
12451212
; ARMV8-NEXT: vmov.i32 d17, #0x0
1246-
; ARMV8-NEXT: vselgt.f64 d17, d18, d17
1247-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1248-
; ARMV8-NEXT: vmov r0, r1, d17
1249-
; ARMV8-NEXT: vselgt.f64 d16, d19, d16
1213+
; ARMV8-NEXT: vmov d19, r0, r1
1214+
; ARMV8-NEXT: vmaxnm.f64 d16, d18, d16
1215+
; ARMV8-NEXT: vmaxnm.f64 d17, d19, d17
12501216
; ARMV8-NEXT: vmov r2, r3, d16
1217+
; ARMV8-NEXT: vmov r0, r1, d17
12511218
; ARMV8-NEXT: bx lr
12521219
; ARMV8-NEXT: .p2align 3
12531220
; ARMV8-NEXT: @ %bb.1:
@@ -1307,15 +1274,11 @@ define <2 x double> @fmaxnumv264_minus_zero_intrinsic(<2 x double> %x) {
13071274
; ARMV8-LABEL: fmaxnumv264_minus_zero_intrinsic:
13081275
; ARMV8: @ %bb.0:
13091276
; ARMV8-NEXT: vldr d16, .LCPI31_0
1310-
; ARMV8-NEXT: vmov d17, r0, r1
1311-
; ARMV8-NEXT: vmov d18, r2, r3
1312-
; ARMV8-NEXT: vcmp.f64 d17, d16
1313-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1314-
; ARMV8-NEXT: vcmp.f64 d18, d16
1315-
; ARMV8-NEXT: vselgt.f64 d17, d17, d16
1316-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1317-
; ARMV8-NEXT: vmov r0, r1, d17
1318-
; ARMV8-NEXT: vselgt.f64 d16, d18, d16
1277+
; ARMV8-NEXT: vmov d18, r0, r1
1278+
; ARMV8-NEXT: vmov d17, r2, r3
1279+
; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16
1280+
; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16
1281+
; ARMV8-NEXT: vmov r0, r1, d18
13191282
; ARMV8-NEXT: vmov r2, r3, d16
13201283
; ARMV8-NEXT: bx lr
13211284
; ARMV8-NEXT: .p2align 3
@@ -1367,15 +1330,11 @@ define <2 x double> @fmaxnumv264_non_zero_intrinsic(<2 x double> %x) {
13671330
; ARMV8-LABEL: fmaxnumv264_non_zero_intrinsic:
13681331
; ARMV8: @ %bb.0:
13691332
; ARMV8-NEXT: vmov.f64 d16, #1.000000e+00
1370-
; ARMV8-NEXT: vmov d17, r0, r1
1371-
; ARMV8-NEXT: vcmp.f64 d17, d16
1372-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1373-
; ARMV8-NEXT: vmov d18, r2, r3
1374-
; ARMV8-NEXT: vcmp.f64 d18, d16
1375-
; ARMV8-NEXT: vselgt.f64 d17, d17, d16
1376-
; ARMV8-NEXT: vmrs APSR_nzcv, fpscr
1377-
; ARMV8-NEXT: vmov r0, r1, d17
1378-
; ARMV8-NEXT: vselgt.f64 d16, d18, d16
1333+
; ARMV8-NEXT: vmov d18, r0, r1
1334+
; ARMV8-NEXT: vmov d17, r2, r3
1335+
; ARMV8-NEXT: vmaxnm.f64 d18, d18, d16
1336+
; ARMV8-NEXT: vmaxnm.f64 d16, d17, d16
1337+
; ARMV8-NEXT: vmov r0, r1, d18
13791338
; ARMV8-NEXT: vmov r2, r3, d16
13801339
; ARMV8-NEXT: bx lr
13811340
;

0 commit comments

Comments
 (0)