Skip to content

Commit cf29bc4

Browse files
committed
[X86] Combine LRINT/LLRINT and TRUNC when nuw/nsw
Try to improve performance after llvm#125848
1 parent 1930524 commit cf29bc4

File tree

3 files changed

+125
-0
lines changed

3 files changed

+125
-0
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53919,6 +53919,11 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
5391953919
return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
5392053920
}
5392153921

53922+
if ((N->getFlags().hasNoUnsignedWrap() || N->getFlags().hasNoSignedWrap()) &&
53923+
(Src.getOpcode() == ISD::LRINT || Src.getOpcode() == ISD::LLRINT) &&
53924+
VT.getScalarType() == MVT::i32 && Src.hasOneUse())
53925+
return DAG.getNode(ISD::LRINT, DL, VT, Src.getOperand(0));
53926+
5392253927
return SDValue();
5392353928
}
5392453929

llvm/test/CodeGen/X86/llrint-conv.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,94 @@ entry:
183183
ret i64 %0
184184
}
185185

186+
define i32 @combine_f32_trunc(float %x) nounwind {
187+
; SSE-LABEL: combine_trunc:
188+
; SSE: # %bb.0: # %entry
189+
; SSE-NEXT: cvtss2si %xmm0, %eax
190+
; SSE-NEXT: retq
191+
;
192+
; AVX-LABEL: combine_trunc:
193+
; AVX: # %bb.0: # %entry
194+
; AVX-NEXT: vcvtss2si %xmm0, %eax
195+
; AVX-NEXT: retq
196+
; X86-NOSSE-LABEL: combine_f32_trunc:
197+
; X86-NOSSE: # %bb.0: # %entry
198+
; X86-NOSSE-NEXT: pushl %eax
199+
; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp)
200+
; X86-NOSSE-NEXT: fistpl (%esp)
201+
; X86-NOSSE-NEXT: movl (%esp), %eax
202+
; X86-NOSSE-NEXT: popl %ecx
203+
; X86-NOSSE-NEXT: retl
204+
;
205+
; X86-SSE2-LABEL: combine_f32_trunc:
206+
; X86-SSE2: # %bb.0: # %entry
207+
; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax
208+
; X86-SSE2-NEXT: retl
209+
;
210+
; X86-AVX-LABEL: combine_f32_trunc:
211+
; X86-AVX: # %bb.0: # %entry
212+
; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax
213+
; X86-AVX-NEXT: retl
214+
;
215+
; X64-SSE-LABEL: combine_f32_trunc:
216+
; X64-SSE: # %bb.0: # %entry
217+
; X64-SSE-NEXT: cvtss2si %xmm0, %eax
218+
; X64-SSE-NEXT: retq
219+
;
220+
; X64-AVX-LABEL: combine_f32_trunc:
221+
; X64-AVX: # %bb.0: # %entry
222+
; X64-AVX-NEXT: vcvtss2si %xmm0, %eax
223+
; X64-AVX-NEXT: retq
224+
entry:
225+
%0 = tail call i64 @llvm.llrint.f32(float %x)
226+
%1 = trunc nsw i64 %0 to i32
227+
ret i32 %1
228+
}
229+
230+
define i32 @combine_f64_trunc(double %x) nounwind {
231+
; SSE-LABEL: combine_trunc:
232+
; SSE: # %bb.0: # %entry
233+
; SSE-NEXT: cvtss2si %xmm0, %eax
234+
; SSE-NEXT: retq
235+
;
236+
; AVX-LABEL: combine_trunc:
237+
; AVX: # %bb.0: # %entry
238+
; AVX-NEXT: vcvtss2si %xmm0, %eax
239+
; AVX-NEXT: retq
240+
; X86-NOSSE-LABEL: combine_f64_trunc:
241+
; X86-NOSSE: # %bb.0: # %entry
242+
; X86-NOSSE-NEXT: pushl %eax
243+
; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp)
244+
; X86-NOSSE-NEXT: fistpl (%esp)
245+
; X86-NOSSE-NEXT: movl (%esp), %eax
246+
; X86-NOSSE-NEXT: popl %ecx
247+
; X86-NOSSE-NEXT: retl
248+
;
249+
; X86-SSE2-LABEL: combine_f64_trunc:
250+
; X86-SSE2: # %bb.0: # %entry
251+
; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax
252+
; X86-SSE2-NEXT: retl
253+
;
254+
; X86-AVX-LABEL: combine_f64_trunc:
255+
; X86-AVX: # %bb.0: # %entry
256+
; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax
257+
; X86-AVX-NEXT: retl
258+
;
259+
; X64-SSE-LABEL: combine_f64_trunc:
260+
; X64-SSE: # %bb.0: # %entry
261+
; X64-SSE-NEXT: cvtsd2si %xmm0, %eax
262+
; X64-SSE-NEXT: retq
263+
;
264+
; X64-AVX-LABEL: combine_f64_trunc:
265+
; X64-AVX: # %bb.0: # %entry
266+
; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax
267+
; X64-AVX-NEXT: retq
268+
entry:
269+
%0 = tail call i64 @llvm.llrint.f64(double %x)
270+
%1 = trunc nuw i64 %0 to i32
271+
ret i32 %1
272+
}
273+
186274
declare i64 @llvm.llrint.f32(float) nounwind readnone
187275
declare i64 @llvm.llrint.f64(double) nounwind readnone
188276
declare i64 @llvm.llrint.f80(x86_fp80) nounwind readnone

llvm/test/CodeGen/X86/lrint-conv-i64.ll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,38 @@ entry:
6363
ret i32 %1
6464
}
6565

66+
define i32 @combine_f32_trunc(float %x) {
67+
; SSE-LABEL: combine_f32_trunc:
68+
; SSE: # %bb.0: # %entry
69+
; SSE-NEXT: cvtss2si %xmm0, %eax
70+
; SSE-NEXT: retq
71+
;
72+
; AVX-LABEL: combine_f32_trunc:
73+
; AVX: # %bb.0: # %entry
74+
; AVX-NEXT: vcvtss2si %xmm0, %eax
75+
; AVX-NEXT: retq
76+
entry:
77+
%0 = tail call i64 @llvm.lrint.i64.f32(float %x)
78+
%1 = trunc nuw i64 %0 to i32
79+
ret i32 %1
80+
}
81+
82+
define i32 @combine_f64_trunc(double %x) {
83+
; SSE-LABEL: combine_f64_trunc:
84+
; SSE: # %bb.0: # %entry
85+
; SSE-NEXT: cvtsd2si %xmm0, %eax
86+
; SSE-NEXT: retq
87+
;
88+
; AVX-LABEL: combine_f64_trunc:
89+
; AVX: # %bb.0: # %entry
90+
; AVX-NEXT: vcvtsd2si %xmm0, %eax
91+
; AVX-NEXT: retq
92+
entry:
93+
%0 = tail call i64 @llvm.lrint.i64.f64(double %x)
94+
%1 = trunc nsw i64 %0 to i32
95+
ret i32 %1
96+
}
97+
6698
declare i64 @llvm.lrint.i64.f32(float) nounwind readnone
6799
declare i64 @llvm.lrint.i64.f64(double) nounwind readnone
68100
declare i64 @llvm.lrint.i64.f80(x86_fp80) nounwind readnone

0 commit comments

Comments
 (0)