diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 515764c915bf4..2b950beaca33d 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2763,6 +2763,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, return AdjustCost( BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); + static const TypeConversionCostTblEntry BF16Tbl[] = { + {ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt + {ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2 + {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn + }; + + if (ST->hasBF16()) + if (const auto *Entry = ConvertCostTableLookup( + BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) + return AdjustCost(Entry->Cost); + static const TypeConversionCostTblEntry ConversionTbl[] = { {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn @@ -2850,6 +2865,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl {ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl {ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl + // BF16 (uses shift) + {ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl + {ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt + {ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll + {ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2 + {ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl + {ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2 + {ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2 // FP Ext and trunc {ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt {ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn @@ -2862,6 +2885,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, {ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn {ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn {ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn + // BF16 (more complex, with +bf16 is handled above) + {ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns + {ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above + {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8}, + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8}, + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15}, + {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9}, + {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10}, + {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19}, // LowerVectorINT_TO_FP: {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1}, diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll index c363b58d939e8..b82c2f110d8c8 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=aarch64 %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s ; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s +; RUN: opt -passes="print" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+bf16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16,CHECK-BF16 %s target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" @@ -1237,28 +1238,51 @@ define void @fp16cast() { } define void @bf16cast() { -; CHECK-LABEL: 'bf16cast' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f64 = fpext bfloat undef to double -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NOFP16-LABEL: 'bf16cast' +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncf16f32 = fptrunc float undef to bfloat +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncf16f64 = fptrunc double undef to bfloat +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat> +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-BF16-LABEL: 'bf16cast' +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat> +; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %extf16f32 = fpext bfloat undef to float %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>