Skip to content

Commit 9ae0843

Browse files
committed
X86: Do not return invalid cost for fp16 conversion
1 parent 4abc357 commit 9ae0843

File tree

2 files changed

+10
-13
lines changed

2 files changed

+10
-13
lines changed

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3068,6 +3068,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30683068
if (auto KindCost = Entry->Cost[CostKind])
30693069
return *KindCost;
30703070
}
3071+
3072+
if ((ISD == ISD::FP_ROUND && SimpleDstTy == MVT::f16) ||
3073+
(ISD == ISD::FP_EXTEND && SimpleSrcTy == MVT::f16)) {
3074+
// fp16 conversions not covered yet require a libcall, return a
3075+
// large (arbitrary) number.
3076+
return InstructionCost(64);
3077+
}
30713078
}
30723079

30733080
// Fall back to legalized types.
@@ -3174,11 +3181,6 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
31743181
TTI::CastContextHint::None, CostKind);
31753182
}
31763183

3177-
if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) {
3178-
// Conversion requires a libcall.
3179-
return InstructionCost::getInvalid();
3180-
}
3181-
31823184
// TODO: Allow non-throughput costs that aren't binary.
31833185
auto AdjustCost = [&CostKind](InstructionCost Cost,
31843186
InstructionCost N = 1) -> InstructionCost {

llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -453,14 +453,9 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) {
453453
;
454454
; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16(
455455
; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
456-
; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8
457-
; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8
458-
; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4
459-
; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half>
460-
; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4
461-
; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half>
462-
; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2
463-
; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2
456+
; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4
457+
; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half>
458+
; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2
464459
; CHECK-F16C-NEXT: ret void
465460
;
466461
; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16(

0 commit comments

Comments
 (0)