@@ -19897,21 +19897,50 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, const SDLoc &DL,
1989719897 // See if we have 128-bit vector cast instructions for this type of cast.
1989819898 // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd.
1989919899 if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) ||
19900- IntVT != MVT::i32)
19900+ ( IntVT != MVT::i32 && IntVT != MVT::i64) )
1990119901 return SDValue();
1990219902
1990319903 unsigned SrcSize = SrcVT.getSizeInBits();
1990419904 unsigned IntSize = IntVT.getSizeInBits();
1990519905 unsigned VTSize = VT.getSizeInBits();
19906- MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize);
19907- MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize);
19908- MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize);
19909-
19910- // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
19911- unsigned ToIntOpcode =
19912- SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
19913- unsigned ToFPOpcode =
19914- IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
19906+ unsigned ToIntOpcode, ToFPOpcode;
19907+ unsigned Width = 128;
19908+ bool IsUnsigned = CastToInt.getOpcode() == ISD::FP_TO_UINT;
19909+
19910+ if (Subtarget.hasAVX512()) {
19911+ if (Subtarget.hasVLX()) {
19912+ // AVX512VL could handle for FP_TO_UINT/UINT_TO_FP (f64/32 <-> i32) AVX512F as well but Width = 512
19913+ if (IntVT == MVT::i32) {
19914+ ToIntOpcode = IsUnsigned ? X86ISD::CVTTP2UI : X86ISD::CVTTP2SI;
19915+ ToFPOpcode = IsUnsigned ? X86ISD::CVTUI2P : X86ISD::CVTSI2P;
19916+ } else {
19917+ ToIntOpcode = CastToInt.getOpcode();
19918+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
19919+ }
19920+ } else if (Subtarget.hasDQI() && IntVT == MVT::i64) {
19921+ // AVX512DQ + AVX512VL could handle f64/32 <-> i64 SINT & UINT
19922+ ToIntOpcode = CastToInt.getOpcode();
19923+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
19924+ } else {
19925+ // AVX512F
19926+ Width = 512;
19927+ ToIntOpcode = CastToInt.getOpcode();
19928+ ToFPOpcode = IsUnsigned ? ISD::UINT_TO_FP : ISD::SINT_TO_FP;
19929+ }
19930+ } else {
19931+ if (IntVT != MVT::i32 || IsUnsigned)
19932+ return SDValue();
19933+ // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64.
19934+ ToIntOpcode =
19935+ SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT;
19936+ ToFPOpcode =
19937+ IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP;
19938+ }
19939+
19940+ MVT VecSrcVT = MVT::getVectorVT(SrcVT, Width / SrcSize);
19941+ MVT VecIntVT = MVT::getVectorVT(IntVT, Width / IntSize);
19942+ MVT VecVT = MVT::getVectorVT(VT, Width / VTSize);
19943+
1991519944
1991619945 // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
1991719946 //
0 commit comments