diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8c28985c8e8e7..20227a181788a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2685,6 +2685,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::ZERO_EXTEND_VECTOR_INREG, ISD::SINT_TO_FP, ISD::UINT_TO_FP, + ISD::FP_TO_SINT, ISD::STRICT_SINT_TO_FP, ISD::STRICT_UINT_TO_FP, ISD::FP_TO_SINT_SAT, @@ -56400,6 +56401,17 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::FRINT && + VT.getScalarType() == MVT::i32 && Src.hasOneUse()) + return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0)); + + return SDValue(); +} + // Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { @@ -59425,6 +59437,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::UINT_TO_FP: case ISD::STRICT_UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); + case ISD::FP_TO_SINT: return combineFPToSInt(N, DAG, Subtarget); case ISD::LRINT: case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget); case ISD::FADD: diff --git a/llvm/test/CodeGen/X86/rint-conv.ll b/llvm/test/CodeGen/X86/rint-conv.ll new file mode 100644 index 0000000000000..f43a2a9baff02 --- /dev/null +++ b/llvm/test/CodeGen/X86/rint-conv.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX + +define i32 @combine_f32(float %x) nounwind { +; X86-LABEL: combine_f32: +; X86: # %bb.0: # %entry +; X86-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl +; +; X64-LABEL: combine_f32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtss2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX-LABEL: combine_f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtss2si %xmm0, %eax +; AVX-NEXT: retq +entry: + %0 = tail call float @llvm.rint.f32(float %x) + %1 = fptosi float %0 to i32 + ret i32 %1 +} + +define i32 @combine_f64(double %x) nounwind { +; X86-LABEL: combine_f64: +; X86: # %bb.0: # %entry +; X86-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax +; X86-NEXT: retl +; +; X64-LABEL: combine_f64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtsd2si %xmm0, %eax +; X64-NEXT: retq +; +; AVX-LABEL: combine_f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtsd2si %xmm0, %eax +; AVX-NEXT: retq +entry: + %0 = tail call double @llvm.rint.f32(double %x) + %1 = fptosi double %0 to i32 + ret i32 %1 +} + +define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind { +; X86-LABEL: combine_v4f32: +; X86: # %bb.0: # %entry +; X86-NEXT: cvtps2dq %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: combine_v4f32: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtps2dq %xmm0, %xmm0 +; X64-NEXT: retq +; +; AVX-LABEL: combine_v4f32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtps2dq %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = tail call <4 x float> @llvm.rint.v4f32(<4 x float> %x) + %1 = fptosi <4 x float> %0 to <4 x i32> + ret <4 x i32> %1 +} + +define <2 x i32> @combine_v2f64(<2 x double> %x) nounwind { +; X86-LABEL: combine_v2f64: +; X86: # %bb.0: # %entry +; X86-NEXT: cvtpd2dq %xmm0, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: combine_v2f64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtpd2dq %xmm0, %xmm0 +; X64-NEXT: retq +; +; AVX-LABEL: combine_v2f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtpd2dq %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %0 = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %x) + %1 = fptosi <2 x double> %0 to <2 x i32> + ret <2 x i32> %1 +} + +define <4 x i32> @combine_v4f64(<4 x double> %x) nounwind { +; X86-LABEL: combine_v4f64: +; X86: # %bb.0: # %entry +; X86-NEXT: cvtpd2dq %xmm1, %xmm1 +; X86-NEXT: cvtpd2dq %xmm0, %xmm0 +; X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X86-NEXT: retl +; +; X64-LABEL: combine_v4f64: +; X64: # %bb.0: # %entry +; X64-NEXT: cvtpd2dq %xmm1, %xmm1 +; X64-NEXT: cvtpd2dq %xmm0, %xmm0 +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: retq +; +; AVX-LABEL: combine_v4f64: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtpd2dq %ymm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq +entry: + %0 = tail call <4 x double> @llvm.rint.v4f64(<4 x double> %x) + %1 = fptosi <4 x double> %0 to <4 x i32> + ret <4 x i32> %1 +}