Skip to content

Commit 5b627e4

Browse files
committed
[AArch64] Add BF16 fpext and fptrunc costs.
This expands the recently added fp16 fpext and fpround costs to bf16. Some of the costs are from the rough number of instructions needed, some are a little aspirational. https://godbolt.org/z/bGEEd1vsW
1 parent 82fecab commit 5b627e4

File tree

2 files changed

+78
-22
lines changed

2 files changed

+78
-22
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2763,6 +2763,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
27632763
return AdjustCost(
27642764
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
27652765

2766+
static const TypeConversionCostTblEntry BF16Tbl[] = {
2767+
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
2768+
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
2769+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn
2770+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2
2771+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
2772+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
2773+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
2774+
};
2775+
2776+
if (ST->hasBF16())
2777+
if (const auto *Entry = ConvertCostTableLookup(
2778+
BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
2779+
return AdjustCost(Entry->Cost);
2780+
27662781
static const TypeConversionCostTblEntry ConversionTbl[] = {
27672782
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn
27682783
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn
@@ -2850,6 +2865,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28502865
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl
28512866
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl
28522867
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl
2868+
// BF16 (uses shift)
2869+
{ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl
2870+
{ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt
2871+
{ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll
2872+
{ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2
2873+
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl
2874+
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2
2875+
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2
28532876
// FP Ext and trunc
28542877
{ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt
28552878
{ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn
@@ -2862,6 +2885,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28622885
{ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn
28632886
{ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn
28642887
{ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn
2888+
// BF16 (more complex, with +bf16 is handled above)
2889+
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns
2890+
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above
2891+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8},
2892+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8},
2893+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15},
2894+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9},
2895+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10},
2896+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19},
28652897

28662898
// LowerVectorINT_TO_FP:
28672899
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},

llvm/test/Analysis/CostModel/AArch64/cast.ll

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s
4+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+bf16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16,CHECK-BF16 %s
45

56
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
67

@@ -1237,28 +1238,51 @@ define void @fp16cast() {
12371238
}
12381239

12391240
define void @bf16cast() {
1240-
; CHECK-LABEL: 'bf16cast'
1241-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1242-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1243-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1244-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1245-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1246-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f64 = fpext bfloat undef to double
1247-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1248-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1249-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1250-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1251-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1252-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1253-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1254-
; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1255-
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1256-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1257-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1258-
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1259-
; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1260-
; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1261-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1241+
; CHECK-NOFP16-LABEL: 'bf16cast'
1242+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1243+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1244+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1245+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1246+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1247+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
1248+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1249+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1250+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1251+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1252+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1253+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1254+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1255+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1256+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1257+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1258+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1259+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1260+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1261+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1262+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1263+
;
1264+
; CHECK-BF16-LABEL: 'bf16cast'
1265+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1266+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1267+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1268+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1269+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1270+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
1271+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1272+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1273+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1274+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1275+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1276+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1277+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1278+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1279+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1280+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1281+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1282+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1283+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1284+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1285+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12621286
;
12631287
%extf16f32 = fpext bfloat undef to float
12641288
%extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>

0 commit comments

Comments
 (0)