Skip to content

Commit 819fcd8

Browse files
committed
[X86] Added optimizations for fp to signed & unsigned i32 conversions.
Extends lowerFPToIntToFP to support i32 conversions on both VLX + DQ and DQ targets, as well as modifies appropriate test cases.
1 parent 1eb12e6 commit 819fcd8

File tree

4 files changed

+68
-32
lines changed

4 files changed

+68
-32
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19933,7 +19933,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
1993319933
// See if we have 128-bit vector cast instructions for this type of cast.
1993419934
// We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
1993519935
if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
19936-
!(IntVT == MVT::i32 || (IntVT == MVT::i64 && Subtarget.hasDQI())))
19936+
(IntVT != MVT::i32 && IntVT != MVT::i64))
1993719937
return SDValue();
1993819938

1993919939
unsigned SrcSize = SrcVT.getSizeInBits();
@@ -19943,7 +19943,7 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
1994319943
unsigned Width = 128;
1994419944
bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
1994519945

19946-
if (Subtarget.hasVLX() && IntVT == MVT::i64) {
19946+
if (Subtarget.hasVLX() && Subtarget.hasDQI()) {
1994719947
// AVX512DQ+VLX
1994819948
if (IsUnsigned) {
1994919949
ToIntOpcode =
@@ -19956,14 +19956,14 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
1995619956
ToFPOpcode =
1995719957
IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
1995819958
}
19959-
} else if (IntVT == MVT::i64) {
19960-
// Need to extend width for AVX512DQ without AVX512VL
19959+
} else if (Subtarget.hasDQI()) {
19960+
// Need to extend width for AVX512DQ without AVX512VL.
1996119961
Width = 512;
1996219962
ToIntOpcode = CastToInt.getOpcode();
1996319963
ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
1996419964
} else {
19965-
// SSE2
19966-
if (IsUnsigned)
19965+
// SSE2 can only perform f64/f32 <-> i32 signed.
19966+
if (IsUnsigned || IntVT == MVT::i64)
1996719967
return SDValue();
1996819968
ToIntOpcode =
1996919969
SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
@@ -19972,18 +19972,19 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
1997219972
}
1997319973

1997419974
MVT VecSrcVT, VecIntVT, VecVT;
19975-
unsigned NumElts = Width / IntSize;
19976-
VecIntVT = MVT::getVectorVT(IntVT, NumElts);
19975+
unsigned NumElts;
1997719976
unsigned SrcElts, VTElts;
19978-
// vcvttps2qq cannot convert v16f32 <-> v8i64
19979-
if (IntVT == MVT::i64 && Width == 512) {
19977+
// Some conversions are only legal with uniform vector sizes on AVXDQ.
19978+
if (Width == 512) {
19979+
NumElts = std::min(Width / IntSize, Width / SrcSize);
1998019980
SrcElts = NumElts;
1998119981
VTElts = NumElts;
1998219982
} else {
19983+
NumElts = Width / IntSize;
1998319984
SrcElts = Width / SrcSize;
1998419985
VTElts = Width / VTSize;
1998519986
}
19986-
19987+
VecIntVT = MVT::getVectorVT(IntVT, NumElts);
1998719988
VecSrcVT = MVT::getVectorVT(SrcVT, SrcElts);
1998819989
VecVT = MVT::getVectorVT(VT, VTElts);
1998919990
// sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0

llvm/test/CodeGen/X86/fp-int-fp-cvt.ll

Lines changed: 50 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,26 @@ define double @scvtf64_i32(double %a0) {
1616
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
1717
; SSE-NEXT: retq
1818
;
19-
; AVX-LABEL: scvtf64_i32:
20-
; AVX: # %bb.0:
21-
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
22-
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
23-
; AVX-NEXT: retq
19+
; AVX2-LABEL: scvtf64_i32:
20+
; AVX2: # %bb.0:
21+
; AVX2-NEXT: vcvttpd2dq %xmm0, %xmm0
22+
; AVX2-NEXT: vcvtdq2pd %xmm0, %xmm0
23+
; AVX2-NEXT: retq
24+
;
25+
; AVX512-VL-LABEL: scvtf64_i32:
26+
; AVX512-VL: # %bb.0:
27+
; AVX512-VL-NEXT: vcvttpd2dq %xmm0, %xmm0
28+
; AVX512-VL-NEXT: vcvtdq2pd %xmm0, %xmm0
29+
; AVX512-VL-NEXT: retq
30+
;
31+
; AVX512-NOVL-LABEL: scvtf64_i32:
32+
; AVX512-NOVL: # %bb.0:
33+
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
34+
; AVX512-NOVL-NEXT: vcvttpd2dq %zmm0, %ymm0
35+
; AVX512-NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0
36+
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
37+
; AVX512-NOVL-NEXT: vzeroupper
38+
; AVX512-NOVL-NEXT: retq
2439
%ii = fptosi double %a0 to i32
2540
%ff = sitofp i32 %ii to double
2641
ret double %ff
@@ -129,11 +144,20 @@ define double @ucvtf64_i32(double %a0) {
129144
; AVX2-NEXT: vcvtsi2sd %rax, %xmm15, %xmm0
130145
; AVX2-NEXT: retq
131146
;
132-
; AVX512-LABEL: ucvtf64_i32:
133-
; AVX512: # %bb.0:
134-
; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
135-
; AVX512-NEXT: vcvtusi2sd %eax, %xmm15, %xmm0
136-
; AVX512-NEXT: retq
147+
; AVX512-VL-LABEL: ucvtf64_i32:
148+
; AVX512-VL: # %bb.0:
149+
; AVX512-VL-NEXT: vcvttpd2udq %xmm0, %xmm0
150+
; AVX512-VL-NEXT: vcvtudq2pd %xmm0, %xmm0
151+
; AVX512-VL-NEXT: retq
152+
;
153+
; AVX512-NOVL-LABEL: ucvtf64_i32:
154+
; AVX512-NOVL: # %bb.0:
155+
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
156+
; AVX512-NOVL-NEXT: vcvttpd2udq %zmm0, %ymm0
157+
; AVX512-NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
158+
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
159+
; AVX512-NOVL-NEXT: vzeroupper
160+
; AVX512-NOVL-NEXT: retq
137161
%ii = fptoui double %a0 to i32
138162
%ff = uitofp i32 %ii to double
139163
ret double %ff
@@ -208,11 +232,20 @@ define float @ucvtf32_i32(float %a0) {
208232
; AVX2-NEXT: vcvtsi2ss %rax, %xmm15, %xmm0
209233
; AVX2-NEXT: retq
210234
;
211-
; AVX512-LABEL: ucvtf32_i32:
212-
; AVX512: # %bb.0:
213-
; AVX512-NEXT: vcvttss2usi %xmm0, %eax
214-
; AVX512-NEXT: vcvtusi2ss %eax, %xmm15, %xmm0
215-
; AVX512-NEXT: retq
235+
; AVX512-VL-LABEL: ucvtf32_i32:
236+
; AVX512-VL: # %bb.0:
237+
; AVX512-VL-NEXT: vcvttps2udq %xmm0, %xmm0
238+
; AVX512-VL-NEXT: vcvtudq2ps %xmm0, %xmm0
239+
; AVX512-VL-NEXT: retq
240+
;
241+
; AVX512-NOVL-LABEL: ucvtf32_i32:
242+
; AVX512-NOVL: # %bb.0:
243+
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
244+
; AVX512-NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
245+
; AVX512-NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
246+
; AVX512-NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
247+
; AVX512-NOVL-NEXT: vzeroupper
248+
; AVX512-NOVL-NEXT: retq
216249
%ii = fptoui float %a0 to i32
217250
%ff = uitofp i32 %ii to float
218251
ret float %ff
@@ -283,3 +316,5 @@ define float @ucvtf32_i64(float %a0) {
283316
%ff = uitofp i64 %ii to float
284317
ret float %ff
285318
}
319+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
320+
; AVX512: {{.*}}

llvm/test/CodeGen/X86/isint.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,8 +237,8 @@ define i32 @isuint_return(double %d) nounwind {
237237
;
238238
; AVX512VL-LABEL: isuint_return:
239239
; AVX512VL: # %bb.0:
240-
; AVX512VL-NEXT: vcvttsd2usi %xmm0, %eax
241-
; AVX512VL-NEXT: vcvtusi2sd %eax, %xmm15, %xmm1
240+
; AVX512VL-NEXT: vcvttpd2udq %xmm0, %xmm1
241+
; AVX512VL-NEXT: vcvtudq2pd %xmm1, %xmm1
242242
; AVX512VL-NEXT: vcmpeqsd %xmm1, %xmm0, %k0
243243
; AVX512VL-NEXT: kmovw %k0, %eax
244244
; AVX512VL-NEXT: retq

llvm/test/CodeGen/X86/setoeq.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ define zeroext i8 @oeq_f64_u32(double %x) nounwind readnone {
8686
; AVX512-LABEL: oeq_f64_u32:
8787
; AVX512: # %bb.0: # %entry
8888
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
89-
; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
90-
; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
89+
; AVX512-NEXT: vcvttpd2udq %xmm0, %xmm1
90+
; AVX512-NEXT: vcvtudq2pd %xmm1, %xmm1
9191
; AVX512-NEXT: vcmpeqsd %xmm0, %xmm1, %k0
9292
; AVX512-NEXT: kmovd %k0, %eax
9393
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -350,8 +350,8 @@ define zeroext i8 @une_f64_u32(double %x) nounwind readnone {
350350
; AVX512-LABEL: une_f64_u32:
351351
; AVX512: # %bb.0: # %entry
352352
; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
353-
; AVX512-NEXT: vcvttsd2usi %xmm0, %eax
354-
; AVX512-NEXT: vcvtusi2sd %eax, %xmm7, %xmm1
353+
; AVX512-NEXT: vcvttpd2udq %xmm0, %xmm1
354+
; AVX512-NEXT: vcvtudq2pd %xmm1, %xmm1
355355
; AVX512-NEXT: vcmpneqsd %xmm0, %xmm1, %k0
356356
; AVX512-NEXT: kmovd %k0, %eax
357357
; AVX512-NEXT: # kill: def $al killed $al killed $eax

0 commit comments

Comments
 (0)