Skip to content

Commit 84a22ef

Browse files
committed
Override X86TTIImpl::getStoreMinimumVF instead of tweaking codegen tables.
1 parent 4f2a529 commit 84a22ef

File tree

4 files changed

+35
-17
lines changed

4 files changed

+35
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,9 +1714,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
17141714
setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32);
17151715
setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32);
17161716
}
1717-
// trunc+store via vcvtps2ph
1718-
setOperationAction(ISD::STORE, MVT::v4f16, Custom);
1719-
setOperationAction(ISD::STORE, MVT::v8f16, Custom);
17201717
}
17211718

17221719
// This block controls legalization of the mask vector sizes that are
@@ -1787,9 +1784,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
17871784

17881785
for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
17891786
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1790-
1791-
// trunc+store via vcvtps2ph
1792-
setOperationAction(ISD::STORE, MVT::v16f16, Custom);
17931787
}
17941788
if (Subtarget.hasDQI() && Subtarget.hasVLX()) {
17951789
for (MVT VT : {MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {

llvm/lib/Target/X86/X86TargetTransformInfo.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2977,10 +2977,13 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29772977
};
29782978

29792979
static const TypeConversionCostKindTblEntry F16ConversionTbl[] = {
2980-
{ ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, { 1, 1, 1, 1 } }, // vcvtps2ph
2981-
{ ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, { 1, 1, 1, 1 } }, // vcvtps2ph
2982-
{ ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, { 1, 1, 1, 1 } }, // vcvtph2ps
2983-
{ ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, { 1, 1, 1, 1 } }, // vcvtph2ps
2980+
{ ISD::FP_ROUND, MVT::f16, MVT::f32, { 1, 1, 1, 1 } },
2981+
{ ISD::FP_ROUND, MVT::v8f16, MVT::v8f32, { 1, 1, 1, 1 } },
2982+
{ ISD::FP_ROUND, MVT::v4f16, MVT::v4f32, { 1, 1, 1, 1 } },
2983+
{ ISD::FP_EXTEND, MVT::f32, MVT::f16, { 1, 1, 1, 1 } },
2984+
{ ISD::FP_EXTEND, MVT::f64, MVT::f16, { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
2985+
{ ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, { 1, 1, 1, 1 } },
2986+
{ ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, { 1, 1, 1, 1 } },
29842987
{ ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, { 2, 1, 1, 1 } }, // vcvtph2ps+vcvtps2pd
29852988
};
29862989

@@ -3171,6 +3174,11 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
31713174
TTI::CastContextHint::None, CostKind);
31723175
}
31733176

3177+
if (ISD == ISD::FP_ROUND && LTDest.second.getScalarType() == MVT::f16) {
3178+
// Conversion requires a libcall.
3179+
return InstructionCost::getInvalid();
3180+
}
3181+
31743182
// TODO: Allow non-throughput costs that aren't binary.
31753183
auto AdjustCost = [&CostKind](InstructionCost Cost,
31763184
InstructionCost N = 1) -> InstructionCost {
@@ -6948,6 +6956,14 @@ bool X86TTIImpl::isVectorShiftByScalarCheap(Type *Ty) const {
69486956
return true;
69496957
}
69506958

6959+
unsigned X86TTIImpl::getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
6960+
Type *ScalarValTy) const {
6961+
if (ST->hasF16C() && ScalarMemTy->isHalfTy()) {
6962+
return 4;
6963+
}
6964+
return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
6965+
}
6966+
69516967
bool X86TTIImpl::isProfitableToSinkOperands(Instruction *I,
69526968
SmallVectorImpl<Use *> &Ops) const {
69536969
using namespace llvm::PatternMatch;

llvm/lib/Target/X86/X86TargetTransformInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,9 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
302302

303303
bool isVectorShiftByScalarCheap(Type *Ty) const;
304304

305+
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
306+
Type *ScalarValTy) const;
307+
305308
private:
306309
bool supportsGather() const;
307310
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,

llvm/test/Transforms/SLPVectorizer/X86/conversion-fp16.ll

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,8 @@ define void @fpext_v4xf16_v4xf64(ptr %s0, ptr %d0) {
123123
ret void
124124
}
125125

126-
define void @fpext_v16xf15_v16xf32(ptr %s0, ptr %d0) {
127-
; CHECK-LABEL: define void @fpext_v16xf15_v16xf32(
126+
define void @fpext_v16xf16_v16xf32(ptr %s0, ptr %d0) {
127+
; CHECK-LABEL: define void @fpext_v16xf16_v16xf32(
128128
; CHECK-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
129129
; CHECK-NEXT: [[S1:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 1
130130
; CHECK-NEXT: [[S2:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 2
@@ -206,7 +206,7 @@ define void @fpext_v16xf15_v16xf32(ptr %s0, ptr %d0) {
206206
; CHECK-NEXT: store float [[E15]], ptr [[D16]], align 8
207207
; CHECK-NEXT: ret void
208208
;
209-
; CHECK-F16C-LABEL: define void @fpext_v16xf15_v16xf32(
209+
; CHECK-F16C-LABEL: define void @fpext_v16xf16_v16xf32(
210210
; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
211211
; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds half, ptr [[S0]], i64 8
212212
; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds float, ptr [[D0]], i64 8
@@ -218,7 +218,7 @@ define void @fpext_v16xf15_v16xf32(ptr %s0, ptr %d0) {
218218
; CHECK-F16C-NEXT: store <8 x float> [[TMP4]], ptr [[D8]], align 8
219219
; CHECK-F16C-NEXT: ret void
220220
;
221-
; CHECK-AVX512-LABEL: define void @fpext_v16xf15_v16xf32(
221+
; CHECK-AVX512-LABEL: define void @fpext_v16xf16_v16xf32(
222222
; CHECK-AVX512-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
223223
; CHECK-AVX512-NEXT: [[TMP1:%.*]] = load <16 x half>, ptr [[S0]], align 2
224224
; CHECK-AVX512-NEXT: [[TMP2:%.*]] = fpext <16 x half> [[TMP1]] to <16 x float>
@@ -453,9 +453,14 @@ define void @fpround_v16xf32_v16xf16(ptr %s0, ptr %d0) {
453453
;
454454
; CHECK-F16C-LABEL: define void @fpround_v16xf32_v16xf16(
455455
; CHECK-F16C-SAME: ptr [[S0:%.*]], ptr [[D0:%.*]]) #[[ATTR0]] {
456-
; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <16 x float>, ptr [[S0]], align 4
457-
; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <16 x float> [[TMP1]] to <16 x half>
458-
; CHECK-F16C-NEXT: store <16 x half> [[TMP2]], ptr [[D0]], align 2
456+
; CHECK-F16C-NEXT: [[S8:%.*]] = getelementptr inbounds float, ptr [[S0]], i64 8
457+
; CHECK-F16C-NEXT: [[D8:%.*]] = getelementptr inbounds half, ptr [[D0]], i64 8
458+
; CHECK-F16C-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[S0]], align 4
459+
; CHECK-F16C-NEXT: [[TMP2:%.*]] = fptrunc <8 x float> [[TMP1]] to <8 x half>
460+
; CHECK-F16C-NEXT: [[TMP3:%.*]] = load <8 x float>, ptr [[S8]], align 4
461+
; CHECK-F16C-NEXT: [[TMP4:%.*]] = fptrunc <8 x float> [[TMP3]] to <8 x half>
462+
; CHECK-F16C-NEXT: store <8 x half> [[TMP2]], ptr [[D0]], align 2
463+
; CHECK-F16C-NEXT: store <8 x half> [[TMP4]], ptr [[D8]], align 2
459464
; CHECK-F16C-NEXT: ret void
460465
;
461466
; CHECK-AVX512-LABEL: define void @fpround_v16xf32_v16xf16(

0 commit comments

Comments
 (0)