Skip to content

Commit 36fa87c

Browse files
[LLVM][SelectionDAG] Simplify SplitVecOp_VSETCC.
Preserving the original result element type when splitting vector setcc operations removes redundant extensions that are awkward to optimise after the fact. Whilst the result type might not be legal, the current decision to force an i1 vector result is almost certainly not going to be legal either. Given by this point we've already gone through the process of identifying a better element result type, I figure we may as well use it.
1 parent d90cac9 commit 36fa87c

File tree

4 files changed

+56
-145
lines changed

4 files changed

+56
-145
lines changed

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4343,9 +4343,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
43434343

43444344
auto PartEltCnt = Lo0.getValueType().getVectorElementCount();
43454345

4346-
LLVMContext &Context = *DAG.getContext();
4347-
EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt);
4348-
EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2);
4346+
EVT VT = N->getValueType(0);
4347+
EVT PartResVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
4348+
assert(PartResVT.getVectorElementCount() == PartEltCnt &&
4349+
"Expected an equal split!");
43494350

43504351
if (Opc == ISD::SETCC) {
43514352
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
@@ -4369,12 +4370,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
43694370
HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
43704371
N->getOperand(2), MaskHi, EVLHi);
43714372
}
4372-
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
43734373

4374-
EVT OpVT = N->getOperand(0).getValueType();
4375-
ISD::NodeType ExtendCode =
4376-
TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
4377-
return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
4374+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LoRes, HiRes);
43784375
}
43794376

43804377

llvm/test/CodeGen/AArch64/bf16-v8-instructions.ll

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -890,8 +890,6 @@ define <8 x i1> @test_fcmp_une(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
890890
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
891891
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
892892
; CHECK-NEXT: mvn v0.16b, v0.16b
893-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
894-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
895893
; CHECK-NEXT: xtn v0.8b, v0.8h
896894
; CHECK-NEXT: ret
897895
%1 = fcmp une <8 x bfloat> %a, %b
@@ -913,8 +911,6 @@ define <8 x i1> @test_fcmp_ueq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
913911
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
914912
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
915913
; CHECK-NEXT: mvn v0.16b, v0.16b
916-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
917-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
918914
; CHECK-NEXT: xtn v0.8b, v0.8h
919915
; CHECK-NEXT: ret
920916
%1 = fcmp ueq <8 x bfloat> %a, %b
@@ -932,8 +928,6 @@ define <8 x i1> @test_fcmp_ugt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
932928
; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s
933929
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
934930
; CHECK-NEXT: mvn v0.16b, v0.16b
935-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
936-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
937931
; CHECK-NEXT: xtn v0.8b, v0.8h
938932
; CHECK-NEXT: ret
939933
%1 = fcmp ugt <8 x bfloat> %a, %b
@@ -951,8 +945,6 @@ define <8 x i1> @test_fcmp_uge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
951945
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
952946
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
953947
; CHECK-NEXT: mvn v0.16b, v0.16b
954-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
955-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
956948
; CHECK-NEXT: xtn v0.8b, v0.8h
957949
; CHECK-NEXT: ret
958950
%1 = fcmp uge <8 x bfloat> %a, %b
@@ -970,8 +962,6 @@ define <8 x i1> @test_fcmp_ult(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
970962
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
971963
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
972964
; CHECK-NEXT: mvn v0.16b, v0.16b
973-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
974-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
975965
; CHECK-NEXT: xtn v0.8b, v0.8h
976966
; CHECK-NEXT: ret
977967
%1 = fcmp ult <8 x bfloat> %a, %b
@@ -989,8 +979,6 @@ define <8 x i1> @test_fcmp_ule(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
989979
; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s
990980
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
991981
; CHECK-NEXT: mvn v0.16b, v0.16b
992-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
993-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
994982
; CHECK-NEXT: xtn v0.8b, v0.8h
995983
; CHECK-NEXT: ret
996984
%1 = fcmp ule <8 x bfloat> %a, %b
@@ -1012,8 +1000,6 @@ define <8 x i1> @test_fcmp_uno(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10121000
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
10131001
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
10141002
; CHECK-NEXT: mvn v0.16b, v0.16b
1015-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1016-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10171003
; CHECK-NEXT: xtn v0.8b, v0.8h
10181004
; CHECK-NEXT: ret
10191005
%1 = fcmp uno <8 x bfloat> %a, %b
@@ -1034,8 +1020,6 @@ define <8 x i1> @test_fcmp_one(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10341020
; CHECK-NEXT: orr v1.16b, v2.16b, v4.16b
10351021
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
10361022
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
1037-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1038-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10391023
; CHECK-NEXT: xtn v0.8b, v0.8h
10401024
; CHECK-NEXT: ret
10411025
%1 = fcmp one <8 x bfloat> %a, %b
@@ -1052,8 +1036,6 @@ define <8 x i1> @test_fcmp_oeq(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10521036
; CHECK-NEXT: fcmeq v2.4s, v3.4s, v2.4s
10531037
; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s
10541038
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1055-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1056-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10571039
; CHECK-NEXT: xtn v0.8b, v0.8h
10581040
; CHECK-NEXT: ret
10591041
%1 = fcmp oeq <8 x bfloat> %a, %b
@@ -1070,8 +1052,6 @@ define <8 x i1> @test_fcmp_ogt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10701052
; CHECK-NEXT: fcmgt v2.4s, v3.4s, v2.4s
10711053
; CHECK-NEXT: fcmgt v0.4s, v0.4s, v1.4s
10721054
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1073-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1074-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10751055
; CHECK-NEXT: xtn v0.8b, v0.8h
10761056
; CHECK-NEXT: ret
10771057
%1 = fcmp ogt <8 x bfloat> %a, %b
@@ -1088,8 +1068,6 @@ define <8 x i1> @test_fcmp_oge(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
10881068
; CHECK-NEXT: fcmge v2.4s, v3.4s, v2.4s
10891069
; CHECK-NEXT: fcmge v0.4s, v0.4s, v1.4s
10901070
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1091-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1092-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
10931071
; CHECK-NEXT: xtn v0.8b, v0.8h
10941072
; CHECK-NEXT: ret
10951073
%1 = fcmp oge <8 x bfloat> %a, %b
@@ -1106,8 +1084,6 @@ define <8 x i1> @test_fcmp_olt(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
11061084
; CHECK-NEXT: fcmgt v2.4s, v3.4s, v2.4s
11071085
; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s
11081086
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1109-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1110-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
11111087
; CHECK-NEXT: xtn v0.8b, v0.8h
11121088
; CHECK-NEXT: ret
11131089
%1 = fcmp olt <8 x bfloat> %a, %b
@@ -1124,8 +1100,6 @@ define <8 x i1> @test_fcmp_ole(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
11241100
; CHECK-NEXT: fcmge v2.4s, v3.4s, v2.4s
11251101
; CHECK-NEXT: fcmge v0.4s, v1.4s, v0.4s
11261102
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1127-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1128-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
11291103
; CHECK-NEXT: xtn v0.8b, v0.8h
11301104
; CHECK-NEXT: ret
11311105
%1 = fcmp ole <8 x bfloat> %a, %b
@@ -1146,8 +1120,6 @@ define <8 x i1> @test_fcmp_ord(<8 x bfloat> %a, <8 x bfloat> %b) #0 {
11461120
; CHECK-NEXT: orr v1.16b, v2.16b, v4.16b
11471121
; CHECK-NEXT: orr v0.16b, v0.16b, v3.16b
11481122
; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h
1149-
; CHECK-NEXT: shl v0.8h, v0.8h, #15
1150-
; CHECK-NEXT: cmlt v0.8h, v0.8h, #0
11511123
; CHECK-NEXT: xtn v0.8b, v0.8h
11521124
; CHECK-NEXT: ret
11531125
%1 = fcmp ord <8 x bfloat> %a, %b

llvm/test/CodeGen/AArch64/fcmp.ll

Lines changed: 51 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,8 +1145,6 @@ define <7 x half> @v7f16_half(<7 x half> %a, <7 x half> %b, <7 x half> %d, <7 x
11451145
; CHECK-SD-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
11461146
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
11471147
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h
1148-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1149-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
11501148
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
11511149
; CHECK-SD-NOFP16-NEXT: ret
11521150
;
@@ -1275,8 +1273,6 @@ define <8 x half> @v8f16_half(<8 x half> %a, <8 x half> %b, <8 x half> %d, <8 x
12751273
; CHECK-SD-NOFP16-NEXT: fcmgt v4.4s, v5.4s, v4.4s
12761274
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
12771275
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v4.8h
1278-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1279-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
12801276
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v2.16b, v3.16b
12811277
; CHECK-SD-NOFP16-NEXT: ret
12821278
;
@@ -1328,10 +1324,6 @@ define <16 x half> @v16f16_half(<16 x half> %a, <16 x half> %b, <16 x half> %d,
13281324
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
13291325
; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v16.8h
13301326
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v3.8h
1331-
; CHECK-SD-NOFP16-NEXT: shl v1.8h, v1.8h, #15
1332-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1333-
; CHECK-SD-NOFP16-NEXT: cmlt v1.8h, v1.8h, #0
1334-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
13351327
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v7.16b
13361328
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v6.16b
13371329
; CHECK-SD-NOFP16-NEXT: ret
@@ -1384,45 +1376,41 @@ entry:
13841376
define <7 x i32> @v7f16_i32(<7 x half> %a, <7 x half> %b, <7 x i32> %d, <7 x i32> %e) {
13851377
; CHECK-SD-NOFP16-LABEL: v7f16_i32:
13861378
; CHECK-SD-NOFP16: // %bb.0: // %entry
1387-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v0.8h
1388-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v1.8h
1379+
; CHECK-SD-NOFP16-NEXT: fmov s2, w0
1380+
; CHECK-SD-NOFP16-NEXT: fmov s4, w7
13891381
; CHECK-SD-NOFP16-NEXT: mov x8, sp
1390-
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1391-
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1392-
; CHECK-SD-NOFP16-NEXT: ldr s4, [sp, #24]
1393-
; CHECK-SD-NOFP16-NEXT: add x9, sp, #32
1394-
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[1], [x9]
1395-
; CHECK-SD-NOFP16-NEXT: add x9, sp, #16
1396-
; CHECK-SD-NOFP16-NEXT: fcmgt v2.4s, v3.4s, v2.4s
1397-
; CHECK-SD-NOFP16-NEXT: fmov s3, w4
1382+
; CHECK-SD-NOFP16-NEXT: fmov s5, w4
1383+
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1384+
; CHECK-SD-NOFP16-NEXT: ldr s3, [sp, #24]
1385+
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
1386+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1387+
; CHECK-SD-NOFP16-NEXT: add x9, sp, #8
1388+
; CHECK-SD-NOFP16-NEXT: mov v2.s[1], w1
1389+
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[1], [x8]
1390+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1391+
; CHECK-SD-NOFP16-NEXT: mov v5.s[1], w5
1392+
; CHECK-SD-NOFP16-NEXT: add x8, sp, #32
1393+
; CHECK-SD-NOFP16-NEXT: ld1 { v3.s }[1], [x8]
1394+
; CHECK-SD-NOFP16-NEXT: add x8, sp, #16
1395+
; CHECK-SD-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
1396+
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[2], [x9]
1397+
; CHECK-SD-NOFP16-NEXT: add x9, sp, #40
1398+
; CHECK-SD-NOFP16-NEXT: mov v2.s[2], w2
13981399
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
1399-
; CHECK-SD-NOFP16-NEXT: fmov s1, w0
1400-
; CHECK-SD-NOFP16-NEXT: mov v3.s[1], w5
1401-
; CHECK-SD-NOFP16-NEXT: mov v1.s[1], w1
1402-
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v2.8h
1403-
; CHECK-SD-NOFP16-NEXT: fmov s2, w7
1404-
; CHECK-SD-NOFP16-NEXT: mov v3.s[2], w6
1405-
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[1], [x8]
1406-
; CHECK-SD-NOFP16-NEXT: mov v1.s[2], w2
1407-
; CHECK-SD-NOFP16-NEXT: add x8, sp, #8
1408-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1409-
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[2], [x8]
1410-
; CHECK-SD-NOFP16-NEXT: add x8, sp, #40
1411-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
1412-
; CHECK-SD-NOFP16-NEXT: mov v1.s[3], w3
1413-
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[2], [x8]
1414-
; CHECK-SD-NOFP16-NEXT: ld1 { v2.s }[3], [x9]
1415-
; CHECK-SD-NOFP16-NEXT: sshll v5.4s, v0.4h, #0
1416-
; CHECK-SD-NOFP16-NEXT: sshll2 v0.4s, v0.8h, #0
1417-
; CHECK-SD-NOFP16-NEXT: bif v1.16b, v2.16b, v5.16b
1418-
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v3.16b, v4.16b
1400+
; CHECK-SD-NOFP16-NEXT: mov v5.s[2], w6
1401+
; CHECK-SD-NOFP16-NEXT: ld1 { v3.s }[2], [x9]
1402+
; CHECK-SD-NOFP16-NEXT: ld1 { v4.s }[3], [x8]
1403+
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v6.16b
1404+
; CHECK-SD-NOFP16-NEXT: mov v2.s[3], w3
1405+
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v5.16b, v3.16b
1406+
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v2.16b, v4.16b
1407+
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
1408+
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
1409+
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
14191410
; CHECK-SD-NOFP16-NEXT: mov w1, v1.s[1]
14201411
; CHECK-SD-NOFP16-NEXT: mov w2, v1.s[2]
14211412
; CHECK-SD-NOFP16-NEXT: mov w3, v1.s[3]
1422-
; CHECK-SD-NOFP16-NEXT: mov w5, v0.s[1]
1423-
; CHECK-SD-NOFP16-NEXT: mov w6, v0.s[2]
14241413
; CHECK-SD-NOFP16-NEXT: fmov w0, s1
1425-
; CHECK-SD-NOFP16-NEXT: fmov w4, s0
14261414
; CHECK-SD-NOFP16-NEXT: ret
14271415
;
14281416
; CHECK-SD-FP16-LABEL: v7f16_i32:
@@ -1630,17 +1618,12 @@ entry:
16301618
define <8 x i32> @v8f16_i32(<8 x half> %a, <8 x half> %b, <8 x i32> %d, <8 x i32> %e) {
16311619
; CHECK-SD-NOFP16-LABEL: v8f16_i32:
16321620
; CHECK-SD-NOFP16: // %bb.0: // %entry
1633-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v6.4s, v0.8h
1634-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v7.4s, v1.8h
1635-
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1636-
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1621+
; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v0.4h
1622+
; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
1623+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1624+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
16371625
; CHECK-SD-NOFP16-NEXT: fcmgt v6.4s, v7.4s, v6.4s
16381626
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v1.4s, v0.4s
1639-
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v6.8h
1640-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1641-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
1642-
; CHECK-SD-NOFP16-NEXT: sshll v6.4s, v0.4h, #0
1643-
; CHECK-SD-NOFP16-NEXT: sshll2 v0.4s, v0.8h, #0
16441627
; CHECK-SD-NOFP16-NEXT: mov v1.16b, v0.16b
16451628
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v6.16b
16461629
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v3.16b, v5.16b
@@ -1694,37 +1677,24 @@ entry:
16941677
define <16 x i32> @v16f16_i32(<16 x half> %a, <16 x half> %b, <16 x i32> %d, <16 x i32> %e) {
16951678
; CHECK-SD-NOFP16-LABEL: v16f16_i32:
16961679
; CHECK-SD-NOFP16: // %bb.0: // %entry
1697-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v17.4s, v0.8h
1698-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v18.4s, v2.8h
1699-
; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
1700-
; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v2.4h
1701-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v16.4s, v1.8h
1702-
; CHECK-SD-NOFP16-NEXT: fcvtl2 v19.4s, v3.8h
1703-
; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
1704-
; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v3.4h
1705-
; CHECK-SD-NOFP16-NEXT: fcmgt v17.4s, v18.4s, v17.4s
1706-
; CHECK-SD-NOFP16-NEXT: fcmgt v0.4s, v2.4s, v0.4s
1707-
; CHECK-SD-NOFP16-NEXT: fcmgt v2.4s, v19.4s, v16.4s
1708-
; CHECK-SD-NOFP16-NEXT: fcmgt v1.4s, v3.4s, v1.4s
1709-
; CHECK-SD-NOFP16-NEXT: ldp q18, q19, [sp, #32]
1710-
; CHECK-SD-NOFP16-NEXT: uzp1 v0.8h, v0.8h, v17.8h
1711-
; CHECK-SD-NOFP16-NEXT: uzp1 v1.8h, v1.8h, v2.8h
1712-
; CHECK-SD-NOFP16-NEXT: ldp q2, q20, [sp]
1713-
; CHECK-SD-NOFP16-NEXT: shl v0.8h, v0.8h, #15
1714-
; CHECK-SD-NOFP16-NEXT: shl v1.8h, v1.8h, #15
1715-
; CHECK-SD-NOFP16-NEXT: cmlt v0.8h, v0.8h, #0
1716-
; CHECK-SD-NOFP16-NEXT: cmlt v1.8h, v1.8h, #0
1717-
; CHECK-SD-NOFP16-NEXT: sshll v3.4s, v0.4h, #0
1718-
; CHECK-SD-NOFP16-NEXT: sshll v16.4s, v1.4h, #0
1719-
; CHECK-SD-NOFP16-NEXT: sshll2 v17.4s, v1.8h, #0
1720-
; CHECK-SD-NOFP16-NEXT: sshll2 v1.4s, v0.8h, #0
1721-
; CHECK-SD-NOFP16-NEXT: mov v0.16b, v3.16b
1722-
; CHECK-SD-NOFP16-NEXT: mov v3.16b, v17.16b
1723-
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v20.16b
1724-
; CHECK-SD-NOFP16-NEXT: bsl v0.16b, v4.16b, v2.16b
1725-
; CHECK-SD-NOFP16-NEXT: mov v2.16b, v16.16b
1726-
; CHECK-SD-NOFP16-NEXT: bsl v3.16b, v7.16b, v19.16b
1727-
; CHECK-SD-NOFP16-NEXT: bsl v2.16b, v6.16b, v18.16b
1680+
; CHECK-SD-NOFP16-NEXT: fcvtl v16.4s, v1.4h
1681+
; CHECK-SD-NOFP16-NEXT: fcvtl v17.4s, v3.4h
1682+
; CHECK-SD-NOFP16-NEXT: fcvtl v18.4s, v0.4h
1683+
; CHECK-SD-NOFP16-NEXT: fcvtl v19.4s, v2.4h
1684+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
1685+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
1686+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
1687+
; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
1688+
; CHECK-SD-NOFP16-NEXT: fcmgt v16.4s, v17.4s, v16.4s
1689+
; CHECK-SD-NOFP16-NEXT: fcmgt v18.4s, v19.4s, v18.4s
1690+
; CHECK-SD-NOFP16-NEXT: fcmgt v3.4s, v3.4s, v1.4s
1691+
; CHECK-SD-NOFP16-NEXT: fcmgt v1.4s, v2.4s, v0.4s
1692+
; CHECK-SD-NOFP16-NEXT: ldp q0, q19, [sp]
1693+
; CHECK-SD-NOFP16-NEXT: ldp q2, q17, [sp, #32]
1694+
; CHECK-SD-NOFP16-NEXT: bit v0.16b, v4.16b, v18.16b
1695+
; CHECK-SD-NOFP16-NEXT: bsl v1.16b, v5.16b, v19.16b
1696+
; CHECK-SD-NOFP16-NEXT: bsl v3.16b, v7.16b, v17.16b
1697+
; CHECK-SD-NOFP16-NEXT: bit v2.16b, v6.16b, v16.16b
17281698
; CHECK-SD-NOFP16-NEXT: ret
17291699
;
17301700
; CHECK-SD-FP16-LABEL: v16f16_i32:

0 commit comments

Comments
 (0)