@@ -3093,8 +3093,9 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
30933093 BaseT::getCastInstrCost (Opcode, Dst, Src, CCH, CostKind, I));
30943094
30953095 // For the moment we do not have lowering for SVE1-only fptrunc f64->bf16 as
3096- // we use fcvtx undef SVE2. Give them invalid costs.
3097- if (!ST->hasSVE2 () && ISD == ISD::FP_ROUND && SrcTy.isScalableVector () &&
3096+ // we use fcvtx under SVE2. Give them invalid costs.
3097+ if (!ST->hasSVE2 () && !ST->isStreamingSVEAvailable () &&
3098+ ISD == ISD::FP_ROUND && SrcTy.isScalableVector () &&
30983099 DstTy.getScalarType () == MVT::bf16 && SrcTy.getScalarType () == MVT::f64 )
30993100 return InstructionCost::getInvalid ();
31003101
@@ -3106,11 +3107,11 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
31063107 {ISD::FP_ROUND, MVT::v2bf16, MVT::v2f64, 2 }, // bfcvtn+fcvtn
31073108 {ISD::FP_ROUND, MVT::v4bf16, MVT::v4f64, 3 }, // fcvtn+fcvtl2+bfcvtn
31083109 {ISD::FP_ROUND, MVT::v8bf16, MVT::v8f64, 6 }, // 2 * fcvtn+fcvtn2+bfcvtn
3109- {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1 }, // bfcvt
3110- {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1 }, // bfcvt
3111- {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3 }, // bfcvt+bfcvt+uzp1
3112- {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2 }, // fcvtx+bfcvt
3113- {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5 }, // fcvtx+bfcvt+ bfcvt+uzp1
3110+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f32, 1 }, // bfcvt
3111+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f32, 1 }, // bfcvt
3112+ {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f32, 3 }, // bfcvt+bfcvt+uzp1
3113+ {ISD::FP_ROUND, MVT::nxv2bf16, MVT::nxv2f64, 2 }, // fcvtx+bfcvt
3114+ {ISD::FP_ROUND, MVT::nxv4bf16, MVT::nxv4f64, 5 }, // 2* fcvtx+2* bfcvt+uzp1
31143115 {ISD::FP_ROUND, MVT::nxv8bf16, MVT::nxv8f64, 11 }, // 4*fcvt+4*bfcvt+3*uzp
31153116 };
31163117
0 commit comments