Skip to content

Commit 2db7b31

Browse files
authored
[AArch64] Add BF16 fpext and fptrunc costs. (#119524)
This expands the recently added fp16 fpext and fpround costs to bf16. Some of the costs are taken from the rough number of instructions needed, some are a little aspirational. https://godbolt.org/z/bGEEd1vsW
1 parent 0fa59c6 commit 2db7b31

File tree

2 files changed

+78
-22
lines changed

2 files changed

+78
-22
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2761,6 +2761,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
27612761
return AdjustCost(
27622762
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
27632763

2764+
static const TypeConversionCostTblEntry BF16Tbl[] = {
2765+
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
2766+
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
2767+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn
2768+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2
2769+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
2770+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
2771+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
2772+
};
2773+
2774+
if (ST->hasBF16())
2775+
if (const auto *Entry = ConvertCostTableLookup(
2776+
BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
2777+
return AdjustCost(Entry->Cost);
2778+
27642779
static const TypeConversionCostTblEntry ConversionTbl[] = {
27652780
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn
27662781
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn
@@ -2848,6 +2863,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28482863
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl
28492864
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl
28502865
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl
2866+
// BF16 (uses shift)
2867+
{ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl
2868+
{ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt
2869+
{ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll
2870+
{ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2
2871+
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl
2872+
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2
2873+
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2
28512874
// FP Ext and trunc
28522875
{ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt
28532876
{ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn
@@ -2860,6 +2883,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28602883
{ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn
28612884
{ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn
28622885
{ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn
2886+
// BF16 (more complex, with +bf16 is handled above)
2887+
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns
2888+
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above
2889+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8},
2890+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8},
2891+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15},
2892+
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9},
2893+
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10},
2894+
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19},
28632895

28642896
// LowerVectorINT_TO_FP:
28652897
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},

llvm/test/Analysis/CostModel/AArch64/cast.ll

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s
33
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s
4+
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+bf16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16,CHECK-BF16 %s
45

56
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
67

@@ -1237,28 +1238,51 @@ define void @fp16cast() {
12371238
}
12381239

12391240
define void @bf16cast() {
1240-
; CHECK-LABEL: 'bf16cast'
1241-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1242-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1243-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1244-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1245-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1246-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f64 = fpext bfloat undef to double
1247-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1248-
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1249-
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1250-
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1251-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1252-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1253-
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1254-
; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1255-
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1256-
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1257-
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1258-
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1259-
; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1260-
; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1261-
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1241+
; CHECK-NOFP16-LABEL: 'bf16cast'
1242+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1243+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1244+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1245+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1246+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1247+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
1248+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1249+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1250+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1251+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1252+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1253+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1254+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1255+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1256+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1257+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1258+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1259+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1260+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1261+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1262+
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1263+
;
1264+
; CHECK-BF16-LABEL: 'bf16cast'
1265+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
1266+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
1267+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
1268+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
1269+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
1270+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
1271+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
1272+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
1273+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
1274+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
1275+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
1276+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
1277+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
1278+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
1279+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
1280+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
1281+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
1282+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
1283+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
1284+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
1285+
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
12621286
;
12631287
%extf16f32 = fpext bfloat undef to float
12641288
%extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>

0 commit comments

Comments
 (0)