Skip to content

Commit 491755c

Browse files
Map vector saturated converts to public intrinsics
1 parent 02b3012 commit 491755c

File tree

6 files changed

+522
-2
lines changed

6 files changed

+522
-2
lines changed

llvm/lib/Target/X86/X86ISelDAGToDAG.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5456,6 +5456,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
54565456
break;
54575457
}
54585458

5459+
case X86ISD::FP_TO_SINT_SAT_CUSTOM:
5460+
case X86ISD::FP_TO_UINT_SAT_CUSTOM:
5461+
if (Subtarget->hasAVX10_2()) {
5462+
bool IsSigned = Node->getOpcode() == X86ISD::FP_TO_SINT_SAT_CUSTOM;
5463+
SDValue Op = Node->getOperand(0);
5464+
EVT VT = Node->getValueType(0);
5465+
EVT OpVT = Op.getValueType();
5466+
MachineSDNode *MachineNode;
5467+
5468+
if (VT == MVT::v4i32 && OpVT == MVT::v4f32) {
5469+
if (IsSigned)
5470+
MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2DQSZ128rr, dl,
5471+
MVT::v4i32, Op);
5472+
else
5473+
MachineNode = CurDAG->getMachineNode(X86::VCVTTPD2UDQSZ128rr, dl,
5474+
MVT::v4i32, Op);
5475+
}
5476+
5477+
if ((VT == MVT::v2i64 && OpVT == MVT::v2f64)) {
5478+
if (IsSigned)
5479+
MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2QQSZ128rr, dl,
5480+
MVT::v2i64, Op);
5481+
else
5482+
MachineNode = CurDAG->getMachineNode(X86::VCVTTPS2UQQSZ128rr, dl,
5483+
MVT::v2i64, Op);
5484+
}
5485+
5486+
SDValue NewNode = SDValue(MachineNode, 0);
5487+
ReplaceNode(Node, NewNode.getNode());
5488+
return;
5489+
}
5490+
break;
5491+
54595492
case X86ISD::ANDNP:
54605493
if (tryVPTERNLOG(Node))
54615494
return;

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 63 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -341,8 +341,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
341341
}
342342
}
343343
if (Subtarget.hasAVX10_2()) {
344-
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Legal);
345-
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Legal);
344+
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v2i32, Custom);
345+
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v2i32, Custom);
346+
for (MVT VT : {MVT::i32, MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
347+
MVT::v4i64, MVT::v8i64}) {
348+
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Legal);
349+
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Legal);
350+
}
346351
if (Subtarget.is64Bit()) {
347352
setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Legal);
348353
setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Legal);
@@ -2656,6 +2661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
26562661
ISD::UINT_TO_FP,
26572662
ISD::STRICT_SINT_TO_FP,
26582663
ISD::STRICT_UINT_TO_FP,
2664+
ISD::FP_TO_SINT_SAT,
2665+
ISD::FP_TO_UINT_SAT,
26592666
ISD::SETCC,
26602667
ISD::MUL,
26612668
ISD::XOR,
@@ -33665,6 +33672,30 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
3366533672
}
3366633673
return;
3366733674
}
33675+
case ISD::FP_TO_SINT_SAT:
33676+
case ISD::FP_TO_UINT_SAT: {
33677+
if (!Subtarget.hasAVX10_2())
33678+
return;
33679+
33680+
bool IsSigned = Opc == ISD::FP_TO_SINT_SAT;
33681+
EVT VT = N->getValueType(0);
33682+
SDValue Op = N->getOperand(0);
33683+
EVT OpVT = Op.getValueType();
33684+
SDValue V4I32;
33685+
33686+
if (VT == MVT::v2i32 && OpVT == MVT::v2f64) {
33687+
SDValue V4f32 = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Op);
33688+
if (IsSigned)
33689+
V4I32 =
33690+
DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
33691+
else
33692+
V4I32 =
33693+
DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v4i32, V4f32);
33694+
Results.push_back(V4I32);
33695+
return;
33696+
}
33697+
break;
33698+
}
3366833699
case ISD::FP_TO_SINT:
3366933700
case ISD::STRICT_FP_TO_SINT:
3367033701
case ISD::FP_TO_UINT:
@@ -34645,6 +34676,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3464534676
NODE_NAME_CASE(VPERMV3)
3464634677
NODE_NAME_CASE(VPERMI)
3464734678
NODE_NAME_CASE(VPTERNLOG)
34679+
NODE_NAME_CASE(FP_TO_SINT_SAT_CUSTOM)
34680+
NODE_NAME_CASE(FP_TO_UINT_SAT_CUSTOM)
3464834681
NODE_NAME_CASE(VFIXUPIMM)
3464934682
NODE_NAME_CASE(VFIXUPIMM_SAE)
3465034683
NODE_NAME_CASE(VFIXUPIMMS)
@@ -56202,6 +56235,32 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
5620256235
return SDValue();
5620356236
}
5620456237

56238+
// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
56239+
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
56240+
const X86Subtarget &Subtarget) {
56241+
if (!Subtarget.hasAVX10_2())
56242+
return SDValue();
56243+
56244+
bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
56245+
EVT SrcVT = N->getOperand(0).getValueType();
56246+
EVT DstVT = N->getValueType(0);
56247+
SDLoc dl(N);
56248+
56249+
if (SrcVT == MVT::v2f32 && DstVT == MVT::v2i64) {
56250+
// Convert v2f32 to v2f64
56251+
SDValue V2F64 =
56252+
DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, N->getOperand(0));
56253+
56254+
// Select the FP_TO_SINT_SAT_CUSTOM/FP_TO_UINT_SAT_CUSTOM node
56255+
if (IsSigned)
56256+
return DAG.getNode(X86ISD::FP_TO_SINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
56257+
else
56258+
return DAG.getNode(X86ISD::FP_TO_UINT_SAT_CUSTOM, dl, MVT::v2i64, V2F64);
56259+
}
56260+
56261+
return SDValue();
56262+
}
56263+
5620556264
static bool needCarryOrOverflowFlag(SDValue Flags) {
5620656265
assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!");
5620756266

@@ -59315,6 +59374,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5931559374
case ISD::INTRINSIC_WO_CHAIN: return combineINTRINSIC_WO_CHAIN(N, DAG, DCI);
5931659375
case ISD::INTRINSIC_W_CHAIN: return combineINTRINSIC_W_CHAIN(N, DAG, DCI);
5931759376
case ISD::INTRINSIC_VOID: return combineINTRINSIC_VOID(N, DAG, DCI);
59377+
case ISD::FP_TO_SINT_SAT:
59378+
case ISD::FP_TO_UINT_SAT: return combineFP_TO_xINT_SAT(N, DAG, Subtarget);
5931859379
// clang-format on
5931959380
}
5932059381

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -908,6 +908,10 @@ namespace llvm {
908908
// Load x87 FPU environment from memory.
909909
FLDENVm,
910910

911+
// Custom handling for FP_TO_xINT_SAT
912+
FP_TO_SINT_SAT_CUSTOM,
913+
FP_TO_UINT_SAT_CUSTOM,
914+
911915
/// This instruction implements FP_TO_SINT with the
912916
/// integer destination in memory and a FP reg source. This corresponds
913917
/// to the X86::FIST*m instructions and the rounding mode change stuff. It

llvm/lib/Target/X86/X86InstrAVX10.td

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,62 @@ let Predicates = [HasAVX10_2] in {
831831
// patterns have been disabled with null_frag.
832832
// Patterns VCVTTPD2DQSZ128
833833

834+
// VCVTTPD2DQS
835+
def : Pat<(v4i32(fp_to_sint_sat(v4f64 VR256X:$src), i32)),
836+
(VCVTTPD2DQSZ256rr VR256X:$src)>;
837+
def : Pat<(v8i32(fp_to_sint_sat(v8f64 VR512:$src), i32)),
838+
(VCVTTPD2DQSZrr VR512:$src)>;
839+
840+
// VCVTTPD2QQS
841+
def : Pat<(v2i64(fp_to_sint_sat(v2f64 VR128X:$src), i64)),
842+
(VCVTTPD2QQSZ128rr VR128X:$src)>;
843+
def : Pat<(v4i64(fp_to_sint_sat(v4f64 VR256X:$src), i64)),
844+
(VCVTTPD2QQSZ256rr VR256X:$src)>;
845+
def : Pat<(v8i64(fp_to_sint_sat(v8f64 VR512:$src), i64)),
846+
(VCVTTPD2QQSZrr VR512:$src)>;
847+
848+
// VCVTTPD2UDQS
849+
def : Pat<(v4i32(fp_to_uint_sat(v4f64 VR256X:$src), i32)),
850+
(VCVTTPD2UDQSZ256rr VR256X:$src)>;
851+
def : Pat<(v8i32(fp_to_uint_sat(v8f64 VR512:$src), i32)),
852+
(VCVTTPD2UDQSZrr VR512:$src)>;
853+
854+
// VCVTTPD2UQQS
855+
def : Pat<(v2i64(fp_to_uint_sat(v2f64 VR128X:$src), i64)),
856+
(VCVTTPD2UQQSZ128rr VR128X:$src)>;
857+
def : Pat<(v4i64(fp_to_uint_sat(v4f64 VR256X:$src), i64)),
858+
(VCVTTPD2UQQSZ256rr VR256X:$src)>;
859+
def : Pat<(v8i64(fp_to_uint_sat(v8f64 VR512:$src), i64)),
860+
(VCVTTPD2UQQSZrr VR512:$src)>;
861+
862+
// VCVTTPS2DQS
863+
def : Pat<(v4i32(fp_to_sint_sat(v4f32 VR128X:$src), i32)),
864+
(VCVTTPS2DQSZ128rr VR128X:$src)>;
865+
def : Pat<(v8i32(fp_to_sint_sat(v8f32 VR256X:$src), i32)),
866+
(VCVTTPS2DQSZ256rr VR256X:$src)>;
867+
def : Pat<(v16i32(fp_to_sint_sat(v16f32 VR512:$src), i32)),
868+
(VCVTTPS2DQSZrr VR512:$src)>;
869+
870+
// VCVTTPS2QQS
871+
def : Pat<(v4i64(fp_to_sint_sat(v4f32 VR128X:$src), i64)),
872+
(VCVTTPS2QQSZ256rr VR128X:$src)>;
873+
def : Pat<(v8i64(fp_to_sint_sat(v8f32 VR256X:$src), i64)),
874+
(VCVTTPS2QQSZrr VR256X:$src)>;
875+
876+
// VCVTTPS2UDQS
877+
def : Pat<(v4i32(fp_to_uint_sat(v4f32 VR128X:$src), i32)),
878+
(VCVTTPS2UDQSZ128rr VR128X:$src)>;
879+
def : Pat<(v8i32(fp_to_uint_sat(v8f32 VR256X:$src), i32)),
880+
(VCVTTPS2UDQSZ256rr VR256X:$src)>;
881+
def : Pat<(v16i32(fp_to_uint_sat(v16f32 VR512:$src), i32)),
882+
(VCVTTPS2UDQSZrr VR512:$src)>;
883+
884+
// VCVTTPS2UQQS
885+
def : Pat<(v4i64(fp_to_uint_sat(v4f32 VR128X:$src), i64)),
886+
(VCVTTPS2UQQSZ256rr VR128X:$src)>;
887+
def : Pat<(v8i64(fp_to_uint_sat(v8f32 VR256X:$src), i64)),
888+
(VCVTTPS2UQQSZrr VR256X:$src)>;
889+
834890
def : Pat<(v4i32 (X86cvttp2sis (v2f64 VR128X:$src))),
835891
(VCVTTPD2DQSZ128rr VR128X:$src)>;
836892
def : Pat<(v4i32 (X86cvttp2sis (loadv2f64 addr:$src))),
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=i686-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X86
3+
; RUN: llc < %s -mtriple=x86_64-linux -mattr=+avx10.2-512 | FileCheck %s --check-prefix=X64
4+
5+
; VCVTTPD2DQS
6+
define <8 x i32> @test_signed_v8i32_v8f64(<8 x double> %f) nounwind {
7+
; X86-LABEL: test_signed_v8i32_v8f64:
8+
; X86: # %bb.0:
9+
; X86-NEXT: vcvttpd2dqs %zmm0, %ymm0
10+
; X86-NEXT: retl
11+
;
12+
; X64-LABEL: test_signed_v8i32_v8f64:
13+
; X64: # %bb.0:
14+
; X64-NEXT: vcvttpd2dqs %zmm0, %ymm0
15+
; X64-NEXT: retq
16+
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
17+
ret <8 x i32> %x
18+
}
19+
20+
; VCVTTPD2QQS
21+
define <8 x i64> @test_signed_v8i64_v8f64(<8 x double> %f) nounwind {
22+
; X86-LABEL: test_signed_v8i64_v8f64:
23+
; X86: # %bb.0:
24+
; X86-NEXT: vcvttpd2qqs %zmm0, %zmm0
25+
; X86-NEXT: retl
26+
;
27+
; X64-LABEL: test_signed_v8i64_v8f64:
28+
; X64: # %bb.0:
29+
; X64-NEXT: vcvttpd2qqs %zmm0, %zmm0
30+
; X64-NEXT: retq
31+
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f64(<8 x double> %f)
32+
ret <8 x i64> %x
33+
}
34+
35+
; VCVTTPD2UDQS
36+
define <8 x i32> @test_unsigned_v8i32_v8f64(<8 x double> %f) nounwind {
37+
; X86-LABEL: test_unsigned_v8i32_v8f64:
38+
; X86: # %bb.0:
39+
; X86-NEXT: vcvttpd2udqs %zmm0, %ymm0
40+
; X86-NEXT: retl
41+
;
42+
; X64-LABEL: test_unsigned_v8i32_v8f64:
43+
; X64: # %bb.0:
44+
; X64-NEXT: vcvttpd2udqs %zmm0, %ymm0
45+
; X64-NEXT: retq
46+
%x = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f64(<8 x double> %f)
47+
ret <8 x i32> %x
48+
}
49+
50+
; VCVTTPD2UQQS
51+
define <8 x i64> @test_unsigned_v8i64_v8f64(<8 x double> %f) nounwind {
52+
; X86-LABEL: test_unsigned_v8i64_v8f64:
53+
; X86: # %bb.0:
54+
; X86-NEXT: vcvttpd2uqqs %zmm0, %zmm0
55+
; X86-NEXT: retl
56+
;
57+
; X64-LABEL: test_unsigned_v8i64_v8f64:
58+
; X64: # %bb.0:
59+
; X64-NEXT: vcvttpd2uqqs %zmm0, %zmm0
60+
; X64-NEXT: retq
61+
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f64(<8 x double> %f)
62+
ret <8 x i64> %x
63+
}
64+
65+
; VCVTTPS2DQS
66+
define <16 x i32> @test_signed_v16i32_v16f32(<16 x float> %f) nounwind {
67+
; X86-LABEL: test_signed_v16i32_v16f32:
68+
; X86: # %bb.0:
69+
; X86-NEXT: vcvttps2dqs %zmm0, %zmm0
70+
; X86-NEXT: retl
71+
;
72+
; X64-LABEL: test_signed_v16i32_v16f32:
73+
; X64: # %bb.0:
74+
; X64-NEXT: vcvttps2dqs %zmm0, %zmm0
75+
; X64-NEXT: retq
76+
%x = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> %f)
77+
ret <16 x i32> %x
78+
}
79+
80+
; VCVTTPS2UDQS
81+
define <16 x i32> @test_unsigned_v16i32_v16f32(<16 x float> %f) nounwind {
82+
; X86-LABEL: test_unsigned_v16i32_v16f32:
83+
; X86: # %bb.0:
84+
; X86-NEXT: vcvttps2udqs %zmm0, %zmm0
85+
; X86-NEXT: retl
86+
;
87+
; X64-LABEL: test_unsigned_v16i32_v16f32:
88+
; X64: # %bb.0:
89+
; X64-NEXT: vcvttps2udqs %zmm0, %zmm0
90+
; X64-NEXT: retq
91+
%x = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> %f)
92+
ret <16 x i32> %x
93+
}
94+
; VCVTTPS2QQS
95+
define <8 x i64> @test_signed_v8i64_v8f32(<8 x float> %f) nounwind {
96+
; X86-LABEL: test_signed_v8i64_v8f32:
97+
; X86: # %bb.0:
98+
; X86-NEXT: vcvttps2qqs %ymm0, %zmm0
99+
; X86-NEXT: retl
100+
;
101+
; X64-LABEL: test_signed_v8i64_v8f32:
102+
; X64: # %bb.0:
103+
; X64-NEXT: vcvttps2qqs %ymm0, %zmm0
104+
; X64-NEXT: retq
105+
%x = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> %f)
106+
ret <8 x i64> %x
107+
}
108+
109+
; VCVTTPS2UQQS
110+
define <8 x i64> @test_unsigned_v8i64_v8f32(<8 x float> %f) nounwind {
111+
; X86-LABEL: test_unsigned_v8i64_v8f32:
112+
; X86: # %bb.0:
113+
; X86-NEXT: vcvttps2uqqs %ymm0, %zmm0
114+
; X86-NEXT: retl
115+
;
116+
; X64-LABEL: test_unsigned_v8i64_v8f32:
117+
; X64: # %bb.0:
118+
; X64-NEXT: vcvttps2uqqs %ymm0, %zmm0
119+
; X64-NEXT: retq
120+
%x = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> %f)
121+
ret <8 x i64> %x
122+
}

0 commit comments

Comments
 (0)