@@ -2296,7 +2296,10 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
22962296 { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, { 1 , 1 , 1 , 1 } },
22972297 { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, { 3 , 1 , 1 , 1 } },
22982298 { ISD::FP_EXTEND, MVT::v16f64, MVT::v16f32, { 4 , 1 , 1 , 1 } }, // 2*vcvtps2pd+vextractf64x4
2299+ { ISD::FP_EXTEND, MVT::v16f32, MVT::v16f16, { 1 , 1 , 1 , 1 } }, // vcvtph2ps
2300+ { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, { 2 , 1 , 1 , 1 } }, // vcvtph2ps+vcvtps2pd
22992301 { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, { 1 , 1 , 1 , 1 } },
2302+ { ISD::FP_ROUND, MVT::v16f16, MVT::v16f32, { 1 , 1 , 1 , 1 } }, // vcvtps2ph
23002303
23012304 { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, { 3 , 1 , 1 , 1 } }, // sext+vpslld+vptestmd
23022305 { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, { 3 , 1 , 1 , 1 } }, // sext+vpslld+vptestmd
@@ -2973,6 +2976,17 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29732976 { ISD::TRUNCATE, MVT::v4i32, MVT::v2i64, { 1 , 1 , 1 , 1 } }, // PSHUFD
29742977 };
29752978
2979+ static const TypeConversionCostKindTblEntry F16ConversionTbl[] = {
2980+ { ISD::FP_ROUND, MVT::f16 , MVT::f32 , { 1 , 1 , 1 , 1 } },
2981+ { ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, { 1 , 1 , 1 , 1 } },
2982+ { ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, { 1 , 1 , 1 , 1 } },
2983+ { ISD::FP_EXTEND, MVT::f32 , MVT::f16 , { 1 , 1 , 1 , 1 } },
2984+ { ISD::FP_EXTEND, MVT::f64 , MVT::f16 , { 2 , 1 , 1 , 1 } }, // vcvtph2ps+vcvtps2pd
2985+ { ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, { 1 , 1 , 1 , 1 } },
2986+ { ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, { 1 , 1 , 1 , 1 } },
2987+ { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, { 2 , 1 , 1 , 1 } }, // vcvtph2ps+vcvtps2pd
2988+ };
2989+
29762990 // Attempt to map directly to (simple) MVT types to let us match custom entries.
29772991 EVT SrcTy = TLI->getValueType (DL, Src);
29782992 EVT DstTy = TLI->getValueType (DL, Dst);
@@ -3034,6 +3048,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30343048 return *KindCost;
30353049 }
30363050
3051+ if (ST->hasF16C ()) {
3052+ if (const auto *Entry = ConvertCostTableLookup (F16ConversionTbl, ISD,
3053+ SimpleDstTy, SimpleSrcTy))
3054+ if (auto KindCost = Entry->Cost [CostKind])
3055+ return *KindCost;
3056+ }
3057+
30373058 if (ST->hasSSE41 ()) {
30383059 if (const auto *Entry = ConvertCostTableLookup (SSE41ConversionTbl, ISD,
30393060 SimpleDstTy, SimpleSrcTy))
@@ -3107,6 +3128,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
31073128 if (auto KindCost = Entry->Cost [CostKind])
31083129 return std::max (LTSrc.first , LTDest.first ) * *KindCost;
31093130
3131+ if (ST->hasF16C ()) {
3132+ if (const auto *Entry = ConvertCostTableLookup (F16ConversionTbl, ISD,
3133+ LTDest.second , LTSrc.second ))
3134+ if (auto KindCost = Entry->Cost [CostKind])
3135+ return std::max (LTSrc.first , LTDest.first ) * *KindCost;
3136+ }
3137+
31103138 if (ST->hasSSE41 ())
31113139 if (const auto *Entry = ConvertCostTableLookup (SSE41ConversionTbl, ISD,
31123140 LTDest.second , LTSrc.second ))
@@ -3146,6 +3174,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
31463174 TTI::CastContextHint::None, CostKind);
31473175 }
31483176
3177+ if (ISD == ISD::FP_ROUND && LTDest.second .getScalarType () == MVT::f16 ) {
3178+ // Conversion requires a libcall.
3179+ return InstructionCost::getInvalid ();
3180+ }
3181+
31493182 // TODO: Allow non-throughput costs that aren't binary.
31503183 auto AdjustCost = [&CostKind](InstructionCost Cost,
31513184 InstructionCost N = 1 ) -> InstructionCost {
@@ -6923,6 +6956,14 @@ bool X86TTIImpl::isVectorShiftByScalarCheap(Type *Ty) const {
69236956 return true ;
69246957}
69256958
6959+ unsigned X86TTIImpl::getStoreMinimumVF (unsigned VF, Type *ScalarMemTy,
6960+ Type *ScalarValTy) const {
6961+ if (ST->hasF16C () && ScalarMemTy->isHalfTy ()) {
6962+ return 4 ;
6963+ }
6964+ return BaseT::getStoreMinimumVF (VF, ScalarMemTy, ScalarValTy);
6965+ }
6966+
69266967bool X86TTIImpl::isProfitableToSinkOperands (Instruction *I,
69276968 SmallVectorImpl<Use *> &Ops) const {
69286969 using namespace llvm ::PatternMatch;
0 commit comments