diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 744e4e740cb21..9b6bd476a78d4 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53919,6 +53919,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); } + if (!Subtarget.useSoftFloat() && N->getFlags().hasNoSignedWrap() && + (Src.getOpcode() == ISD::LRINT || Src.getOpcode() == ISD::LLRINT) && + VT.getScalarType() == MVT::i32 && Src.hasOneUse()) + return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0)); + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll index 402daf80a15e8..5ce1b68770483 100644 --- a/llvm/test/CodeGen/X86/llrint-conv.ll +++ b/llvm/test/CodeGen/X86/llrint-conv.ll @@ -183,6 +183,76 @@ entry: ret i64 %0 } +define i32 @combine_f32_trunc(float %x) nounwind { +; X86-NOSSE-LABEL: combine_f32_trunc: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: combine_f32_trunc: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: combine_f32_trunc: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: combine_f32_trunc: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: cvtss2si %xmm0, %eax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: combine_f32_trunc: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX-NEXT: retq +entry: + %0 = tail call i64 @llvm.llrint.f32(float %x) + %1 = trunc nsw i64 %0 to i32 + ret i32 %1 +} + +define i32 @combine_f64_trunc(double %x) nounwind { +; X86-NOSSE-LABEL: combine_f64_trunc: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: combine_f64_trunc: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: combine_f64_trunc: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: combine_f64_trunc: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: cvtsd2si %xmm0, %eax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: combine_f64_trunc: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX-NEXT: retq +entry: + %0 = tail call i64 @llvm.llrint.f64(double %x) + %1 = trunc nsw i64 %0 to i32 + ret i32 %1 +} + declare i64 @llvm.llrint.f32(float) nounwind readnone declare i64 @llvm.llrint.f64(double) nounwind readnone declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll index 38fa09085e189..96438c8b55f0b 100644 --- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll @@ -63,6 +63,73 @@ entry: ret i32 %1 } +define i32 @combine_f32_nsw_trunc(float %x) { +; SSE-LABEL: combine_f32_nsw_trunc: +; SSE: # %bb.0: # %entry +; SSE-NEXT: cvtss2si %xmm0, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: combine_f32_nsw_trunc: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtss2si %xmm0, %eax +; AVX-NEXT: retq +entry: + %0 = tail call i64 @llvm.lrint.i64.f32(float %x) + %1 = trunc nsw i64 %0 to i32 + ret i32 %1 +} + +;; Check we don't combine trunc when nuw. +define i32 @not_combine_f32_nuw_trunc(float %x) { +; SSE-LABEL: not_combine_f32_nuw_trunc: +; SSE: # %bb.0: # %entry +; SSE-NEXT: cvtss2si %xmm0, %rax +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX-LABEL: not_combine_f32_nuw_trunc: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtss2si %xmm0, %rax +; AVX-NEXT: # kill: def $eax killed $eax killed $rax +; AVX-NEXT: retq +entry: + %0 = tail call i64 @llvm.lrint.i64.f32(float %x) + %1 = trunc nuw i64 %0 to i32 + ret i32 %1 +} + +define i32 @combine_f64_trunc(double %x) { +; SSE-LABEL: combine_f64_trunc: +; SSE: # %bb.0: # %entry +; SSE-NEXT: cvtsd2si %xmm0, %eax +; SSE-NEXT: retq +; +; AVX-LABEL: combine_f64_trunc: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtsd2si %xmm0, %eax +; AVX-NEXT: retq +entry: + %0 = tail call i64 @llvm.lrint.i64.f64(double %x) + %1 = trunc nsw i64 %0 to i32 + ret i32 %1 +} + +;; Check "movl %eax, %eax" is present. +define i64 @zero_upperbits_softfloat(double %x) nounwind "target-features"="+soft-float" { +; CHECK-LABEL: zero_upperbits_softfloat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq lrint@PLT +; CHECK-NEXT: movl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: retq +entry: + %0 = tail call i64 @llvm.lrint.i64.f64(double %x) + %1 = trunc nsw i64 %0 to i32 + %2 = zext i32 %1 to i64 + ret i64 %2 +} + declare i64 @llvm.lrint.i64.f32(float) nounwind readnone declare i64 @llvm.lrint.i64.f64(double) nounwind readnone declare i64 @llvm.lrint.i64.f80(x86_fp80) nounwind readnone diff --git a/llvm/test/CodeGen/X86/vector-llrint.ll b/llvm/test/CodeGen/X86/vector-llrint.ll index 7017eb60df41d..8414b2ced2892 100644 --- a/llvm/test/CodeGen/X86/vector-llrint.ll +++ b/llvm/test/CodeGen/X86/vector-llrint.ll @@ -673,3 +673,23 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <4 x i32> @llrint_v4i32_v4f32(<4 x float> %x) { +; SSE-LABEL: llrint_v4i32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: cvtps2dq %xmm0, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: llrint_v4i32_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vcvtps2dq %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512DQ-LABEL: llrint_v4i32_v4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vcvtps2dq %xmm0, %xmm0 +; AVX512DQ-NEXT: retq + %a = call <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float> %x) + %b = trunc nsw <4 x i64> %a to <4 x i32> + ret <4 x i32> %b +} diff --git a/llvm/test/CodeGen/X86/vector-lrint.ll b/llvm/test/CodeGen/X86/vector-lrint.ll index 3612205bf1bfa..214fe1e27b139 100644 --- a/llvm/test/CodeGen/X86/vector-lrint.ll +++ b/llvm/test/CodeGen/X86/vector-lrint.ll @@ -537,3 +537,28 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ret <8 x iXLen> %a } declare <8 x iXLen> @llvm.lrint.v8iXLen.v8f64(<8 x double>) + +define <4 x i32> @llrint_v4i32_v4f32(<4 x float> %x) { +; X86-SSE2-LABEL: llrint_v4i32_v4f32: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: cvtps2dq %xmm0, %xmm0 +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: llrint_v4i32_v4f32: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: vcvtps2dq %xmm0, %xmm0 +; X86-AVX-NEXT: retl +; +; X64-AVX-i32-LABEL: llrint_v4i32_v4f32: +; X64-AVX-i32: # %bb.0: +; X64-AVX-i32-NEXT: vcvtps2dq %xmm0, %xmm0 +; X64-AVX-i32-NEXT: retq +; +; X64-AVX-i64-LABEL: llrint_v4i32_v4f32: +; X64-AVX-i64: # %bb.0: +; X64-AVX-i64-NEXT: vcvtps2dq %xmm0, %xmm0 +; X64-AVX-i64-NEXT: retq + %a = call <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float> %x) + %b = trunc nsw <4 x i64> %a to <4 x i32> + ret <4 x i32> %b +}