Skip to content

Commit cc7b91b

Browse files
committed
[AArch64] Add BF16 fpext and fptrunc costs.
This expands the recently added fp16 fpext and fpround costs to bf16. Some of the costs are from the rough number of instructions needed, some are a little aspirational. https://godbolt.org/z/bGEEd1vsW
1 parent 2f18b5e commit cc7b91b

File tree

2 files changed

+78
-22
lines changed

2 files changed

+78
-22
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2742,6 +2742,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
27422742
return AdjustCost(
27432743
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
27442744

2745+
static const TypeConversionCostTblEntry BF16Tbl[] = {
2746+
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
2747+
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
2748+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn
2749+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2
2750+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
2751+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
2752+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
2753+
};
2754+
2755+
if (ST->hasBF16())
2756+
if (const auto *Entry = ConvertCostTableLookup(
2757+
BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
2758+
return AdjustCost(Entry->Cost);
2759+
27452760
static const TypeConversionCostTblEntry ConversionTbl[] = {
27462761
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn
27472762
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn
@@ -2812,6 +2827,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28122827
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl
28132828
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl
28142829
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl
2830+
// BF16 (uses shift)
2831+
{ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl
2832+
{ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt
2833+
{ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll
2834+
{ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2
2835+
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl
2836+
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2
2837+
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2
28152838
// FP Ext and trunc
28162839
{ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt
28172840
{ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn
@@ -2824,6 +2847,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28242847
{ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn
28252848
{ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn
28262849
{ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn
2850+
// BF16 (more complex, with +bf16 is handled above)
2851+
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns
2852+
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above
2853+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8},
2854+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8},
2855+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15},
2856+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9},
2857+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10},
2858+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19},
28272859

28282860
// LowerVectorINT_TO_FP:
28292861
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},

llvm/test/Analysis/CostModel/AArch64/cast.ll

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s
4+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+bf16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16,CHECK-BF16 %s
45

56
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
67

@@ -1237,28 +1238,51 @@ define void @fp16cast() {
12371238
}
12381239

12391240
define void @bf16cast() {
1240-
; CHECK-LABEL: 'bf16cast'
1241-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1242-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1243-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1244-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1245-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1246-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f64 = fpext bfloat undef to double
1247-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1248-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1249-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1250-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1251-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1252-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1253-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1254-
; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1255-
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1256-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1257-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1258-
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1259-
; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1260-
; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1261-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1241+
; CHECK-NOFP16-LABEL: 'bf16cast'
1242+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1243+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1244+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1245+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1246+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1247+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
1248+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1249+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1250+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1251+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1252+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1253+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1254+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1255+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1256+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1257+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1258+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1259+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1260+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1261+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1262+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1263+
;
1264+
; CHECK-BF16-LABEL: 'bf16cast'
1265+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1266+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1267+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1268+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1269+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1270+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
1271+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1272+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1273+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1274+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1275+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1276+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1277+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1278+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1279+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1280+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1281+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1282+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1283+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1284+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1285+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12621286
;
12631287
%extf16f32 = fpext bfloat undef to float
12641288
%extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>

0 commit comments

Comments
 (0)