diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index bae223243b3dc..520284d1d7a48 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -3068,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (auto KindCost = Entry->Cost[CostKind]) return *KindCost; } + + if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) || + (ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) { + // fp16 conversions not covered by any table entries require a libcall. + // Return a large (arbitrary) number to model this. + return InstructionCost(64); + } } // Fall back to legalized types. @@ -3174,11 +3181,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, TTI::CastContextHint::None, CostKind); } - if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) { - // Conversion requires a libcall. - return InstructionCost::getInvalid(); - } - // TODO: Allow non-throughput costs that aren't binary. auto AdjustCost = [&CostKind](InstructionCost Cost, InstructionCost N = 1) -> InstructionCost { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll index bcea147d724f5..f23043f0c47f4 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll @@ -453,14 +453,9 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) { ; ; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16( ; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] { -; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8 -; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8 -; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4 -; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half> -; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4 -; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half> -; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2 -; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2 +; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4 +; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half> +; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2 ; CHECK-F16C-NEXT: ret void ; ; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16(