Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2763,6 +2763,21 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));

static const TypeConversionCostTblEntry BF16Tbl[] = {
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 1}, // bfcvt
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 1}, // bfcvt
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 1}, // bfcvtn
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 2}, // bfcvtn+bfcvtn2
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2}, // bfcvtn+fcvtn
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3}, // fcvtn+fcvtl2+bfcvtn
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+bfcvtn
};

if (ST->hasBF16())
if (const auto *Entry = ConvertCostTableLookup(
BF16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);

static const TypeConversionCostTblEntry ConversionTbl[] = {
{ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1}, // xtn
{ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1}, // xtn
Expand Down Expand Up @@ -2850,6 +2865,14 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2f16, 2}, // fcvtl+fcvtl
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4f16, 3}, // fcvtl+fcvtl2+fcvtl
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8f16, 6}, // 2 * fcvtl+fcvtl2+fcvtl
// BF16 (uses shift)
{ISD::FP_EXTEND, MVT::f32, MVT::bf16, 1}, // shl
{ISD::FP_EXTEND, MVT::f64, MVT::bf16, 2}, // shl+fcvt
{ISD::FP_EXTEND, MVT::v4f32, MVT::v4bf16, 1}, // shll
{ISD::FP_EXTEND, MVT::v8f32, MVT::v8bf16, 2}, // shll+shll2
{ISD::FP_EXTEND, MVT::v2f64, MVT::v2bf16, 2}, // shll+fcvtl
{ISD::FP_EXTEND, MVT::v4f64, MVT::v4bf16, 3}, // shll+fcvtl+fcvtl2
{ISD::FP_EXTEND, MVT::v8f64, MVT::v8bf16, 6}, // 2 * shll+fcvtl+fcvtl2
// FP Ext and trunc
{ISD::FP_ROUND, MVT::f32, MVT::f64, 1}, // fcvt
{ISD::FP_ROUND, MVT::v2f32, MVT::v2f64, 1}, // fcvtn
Expand All @@ -2862,6 +2885,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ISD::FP_ROUND, MVT::v2f16, MVT::v2f64, 2}, // fcvtn+fcvtn
{ISD::FP_ROUND, MVT::v4f16, MVT::v4f64, 3}, // fcvtn+fcvtn2+fcvtn
{ISD::FP_ROUND, MVT::v8f16, MVT::v8f64, 6}, // 2 * fcvtn+fcvtn2+fcvtn
// BF16 (more complex, with +bf16 is handled above)
{ISD::FP_ROUND, MVT::bf16, MVT::f32, 8}, // Expansion is ~8 insns
{ISD::FP_ROUND, MVT::bf16, MVT::f64, 9}, // fcvtn + above
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f32, 8},
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f32, 8},
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f32, 15},
{ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 9},
{ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 10},
{ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 19},

// LowerVectorINT_TO_FP:
{ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
Expand Down
68 changes: 46 additions & 22 deletions llvm/test/Analysis/CostModel/AArch64/cast.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16,+bf16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16,CHECK-BF16 %s

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

Expand Down Expand Up @@ -1237,28 +1238,51 @@ define void @fp16cast() {
}

define void @bf16cast() {
; CHECK-LABEL: 'bf16cast'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f64 = fpext bfloat undef to double
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
; CHECK-NOFP16-LABEL: 'bf16cast'
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncf16f32 = fptrunc float undef to bfloat
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncf16f64 = fptrunc double undef to bfloat
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; CHECK-BF16-LABEL: 'bf16cast'
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extf16f32 = fpext bfloat undef to float
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %extv4f16f32 = fpext <4 x bfloat> undef to <4 x float>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv8f16f32 = fpext <8 x bfloat> undef to <8 x float>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %extv16f16f32 = fpext <16 x bfloat> undef to <16 x float>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extf16f64 = fpext bfloat undef to double
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %extv2f16f64 = fpext <2 x bfloat> undef to <2 x double>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %extv4f16f64 = fpext <4 x bfloat> undef to <4 x double>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %extv8f16f64 = fpext <8 x bfloat> undef to <8 x double>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %extv16f16f64 = fpext <16 x bfloat> undef to <16 x double>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f32 = fptrunc float undef to bfloat
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %truncv2f16f32 = fptrunc <2 x float> undef to <2 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncv4f16f32 = fptrunc <4 x float> undef to <4 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv8f16f32 = fptrunc <8 x float> undef to <8 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %truncv16f16f32 = fptrunc <16 x float> undef to <16 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %truncf16f64 = fptrunc double undef to bfloat
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %truncv2f16f64 = fptrunc <2 x double> undef to <2 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %truncv4f16f64 = fptrunc <4 x double> undef to <4 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %truncv8f16f64 = fptrunc <8 x double> undef to <8 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %truncv16f16f64 = fptrunc <16 x double> undef to <16 x bfloat>
; CHECK-BF16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%extf16f32 = fpext bfloat undef to float
%extv2f16f32 = fpext <2 x bfloat> undef to <2 x float>
Expand Down
Loading