Skip to content

Commit 2225596

Browse files
committed
[X86] Combine FRINT + FP_TO_SINT to LRINT
Based on Craig's suggestion on llvm#126217
1 parent 083686d commit 2225596

File tree

2 files changed

+119
-0
lines changed

2 files changed

+119
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2684,6 +2684,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
26842684
ISD::ZERO_EXTEND_VECTOR_INREG,
26852685
ISD::SINT_TO_FP,
26862686
ISD::UINT_TO_FP,
2687+
ISD::FP_TO_SINT,
26872688
ISD::STRICT_SINT_TO_FP,
26882689
ISD::STRICT_UINT_TO_FP,
26892690
ISD::FP_TO_SINT_SAT,
@@ -56380,6 +56381,18 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
5638056381
return SDValue();
5638156382
}
5638256383

56384+
static SDValue combineFPToSInt(SDNode *N, SelectionDAG &DAG,
56385+
const X86Subtarget &Subtarget) {
56386+
EVT VT = N->getValueType(0);
56387+
SDValue Src = N->getOperand(0);
56388+
if (Src.getOpcode() == ISD::FRINT && VT.getScalarType() == MVT::i32 &&
56389+
Src->getFlags().hasNoNaNs() && Src->getFlags().hasNoInfs() &&
56390+
Src.hasOneUse())
56391+
return DAG.getNode(ISD::LRINT, SDLoc(N), VT, Src.getOperand(0));
56392+
56393+
return SDValue();
56394+
}
56395+
5638356396
// Custom handling for VCVTTPS2QQS/VCVTTPS2UQQS
5638456397
static SDValue combineFP_TO_xINT_SAT(SDNode *N, SelectionDAG &DAG,
5638556398
const X86Subtarget &Subtarget) {
@@ -59405,6 +59418,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
5940559418
case ISD::UINT_TO_FP:
5940659419
case ISD::STRICT_UINT_TO_FP:
5940759420
return combineUIntToFP(N, DAG, Subtarget);
59421+
case ISD::FP_TO_SINT: return combineFPToSInt(N, DAG, Subtarget);
5940859422
case ISD::LRINT:
5940959423
case ISD::LLRINT: return combineLRINT_LLRINT(N, DAG, Subtarget);
5941059424
case ISD::FADD:

llvm/test/CodeGen/X86/rint-conv.ll

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86
3+
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64
4+
5+
define i32 @no_combine_f32(float %x) nounwind {
6+
; X86-LABEL: no_combine_f32:
7+
; X86: # %bb.0: # %entry
8+
; X86-NEXT: subl $8, %esp
9+
; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
10+
; X86-NEXT: movss %xmm0, (%esp)
11+
; X86-NEXT: calll rintf
12+
; X86-NEXT: fstps {{[0-9]+}}(%esp)
13+
; X86-NEXT: cvttss2si {{[0-9]+}}(%esp), %eax
14+
; X86-NEXT: addl $8, %esp
15+
; X86-NEXT: retl
16+
;
17+
; X64-LABEL: no_combine_f32:
18+
; X64: # %bb.0: # %entry
19+
; X64-NEXT: pushq %rax
20+
; X64-NEXT: callq rintf@PLT
21+
; X64-NEXT: cvttss2si %xmm0, %eax
22+
; X64-NEXT: popq %rcx
23+
; X64-NEXT: retq
24+
entry:
25+
%0 = tail call float @llvm.rint.f32(float %x)
26+
%1 = fptosi float %0 to i32
27+
ret i32 %1
28+
}
29+
30+
define i32 @combine_f32(float %x) nounwind {
31+
; X86-LABEL: combine_f32:
32+
; X86: # %bb.0: # %entry
33+
; X86-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
34+
; X86-NEXT: retl
35+
;
36+
; X64-LABEL: combine_f32:
37+
; X64: # %bb.0: # %entry
38+
; X64-NEXT: cvtss2si %xmm0, %eax
39+
; X64-NEXT: retq
40+
entry:
41+
%0 = tail call nnan ninf float @llvm.rint.f32(float %x)
42+
%1 = fptosi float %0 to i32
43+
ret i32 %1
44+
}
45+
46+
define i32 @no_combine_f64(double %x) nounwind {
47+
; X86-LABEL: no_combine_f64:
48+
; X86: # %bb.0: # %entry
49+
; X86-NEXT: pushl %ebp
50+
; X86-NEXT: movl %esp, %ebp
51+
; X86-NEXT: andl $-8, %esp
52+
; X86-NEXT: subl $16, %esp
53+
; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
54+
; X86-NEXT: movsd %xmm0, (%esp)
55+
; X86-NEXT: calll rint
56+
; X86-NEXT: fstpl {{[0-9]+}}(%esp)
57+
; X86-NEXT: cvttsd2si {{[0-9]+}}(%esp), %eax
58+
; X86-NEXT: movl %ebp, %esp
59+
; X86-NEXT: popl %ebp
60+
; X86-NEXT: retl
61+
;
62+
; X64-LABEL: no_combine_f64:
63+
; X64: # %bb.0: # %entry
64+
; X64-NEXT: pushq %rax
65+
; X64-NEXT: callq rint@PLT
66+
; X64-NEXT: cvttsd2si %xmm0, %eax
67+
; X64-NEXT: popq %rcx
68+
; X64-NEXT: retq
69+
entry:
70+
%0 = tail call double @llvm.rint.f64(double %x)
71+
%1 = fptosi double %0 to i32
72+
ret i32 %1
73+
}
74+
75+
define i32 @combine_f64(double %x) nounwind {
76+
; X86-LABEL: combine_f64:
77+
; X86: # %bb.0: # %entry
78+
; X86-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
79+
; X86-NEXT: retl
80+
;
81+
; X64-LABEL: combine_f64:
82+
; X64: # %bb.0: # %entry
83+
; X64-NEXT: cvtsd2si %xmm0, %eax
84+
; X64-NEXT: retq
85+
entry:
86+
%0 = tail call nnan ninf double @llvm.rint.f32(double %x)
87+
%1 = fptosi double %0 to i32
88+
ret i32 %1
89+
}
90+
91+
define <4 x i32> @combine_v4f32(<4 x float> %x) nounwind {
92+
; X86-LABEL: combine_v4f32:
93+
; X86: # %bb.0: # %entry
94+
; X86-NEXT: cvtps2dq %xmm0, %xmm0
95+
; X86-NEXT: retl
96+
;
97+
; X64-LABEL: combine_v4f32:
98+
; X64: # %bb.0: # %entry
99+
; X64-NEXT: cvtps2dq %xmm0, %xmm0
100+
; X64-NEXT: retq
101+
entry:
102+
%0 = tail call nnan ninf <4 x float> @llvm.rint.v4f32(<4 x float> %x)
103+
%1 = fptosi <4 x float> %0 to <4 x i32>
104+
ret <4 x i32> %1
105+
}

0 commit comments

Comments
 (0)