@@ -2811,6 +2811,17 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
28112811 BF16Tbl, ISD, DstTy.getSimpleVT (), SrcTy.getSimpleVT ()))
28122812 return AdjustCost (Entry->Cost );
28132813
2814+ // Symbolic constants for the SVE sitofp/uitofp entries in the table below
2815+ // The cost of unpacking twice is artificially increased for now in order
2816+ // to avoid regressions against NEON, which will use tbl instructions directly
2817+ // instead of multiple layers of [s|u]unpk[lo|hi].
2818+ // We use the unpacks in cases where the destination type is illegal and
2819+ // requires splitting of the input, even if the input type itself is legal.
2820+ const unsigned int SVE_EXT_COST = 1 ;
2821+ const unsigned int SVE_FCVT_COST = 1 ;
2822+ const unsigned int SVE_UNPACK_ONCE = 4 ;
2823+ const unsigned int SVE_UNPACK_TWICE = 16 ;
2824+
28142825 static const TypeConversionCostTblEntry ConversionTbl[] = {
28152826 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 1 }, // xtn
28162827 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, // xtn
@@ -2936,6 +2947,42 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29362947 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
29372948 {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
29382949
2950+ // SVE: to nxv2f16
2951+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i8,
2952+ SVE_EXT_COST + SVE_FCVT_COST},
2953+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
2954+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
2955+ {ISD::SINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
2956+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i8,
2957+ SVE_EXT_COST + SVE_FCVT_COST},
2958+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i16, SVE_FCVT_COST},
2959+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i32, SVE_FCVT_COST},
2960+ {ISD::UINT_TO_FP, MVT::nxv2f16, MVT::nxv2i64, SVE_FCVT_COST},
2961+
2962+ // SVE: to nxv4f16
2963+ {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i8,
2964+ SVE_EXT_COST + SVE_FCVT_COST},
2965+ {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
2966+ {ISD::SINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
2967+ {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i8,
2968+ SVE_EXT_COST + SVE_FCVT_COST},
2969+ {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i16, SVE_FCVT_COST},
2970+ {ISD::UINT_TO_FP, MVT::nxv4f16, MVT::nxv4i32, SVE_FCVT_COST},
2971+
2972+ // SVE: to nxv8f16
2973+ {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i8,
2974+ SVE_EXT_COST + SVE_FCVT_COST},
2975+ {ISD::SINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
2976+ {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i8,
2977+ SVE_EXT_COST + SVE_FCVT_COST},
2978+ {ISD::UINT_TO_FP, MVT::nxv8f16, MVT::nxv8i16, SVE_FCVT_COST},
2979+
2980+ // SVE: to nxv16f16
2981+ {ISD::SINT_TO_FP, MVT::nxv16f16, MVT::nxv16i8,
2982+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
2983+ {ISD::UINT_TO_FP, MVT::nxv16f16, MVT::nxv16i8,
2984+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
2985+
29392986 // Complex: to v2f32
29402987 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
29412988 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
@@ -2944,18 +2991,56 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29442991 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
29452992 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
29462993
2994+ // SVE: to nxv2f32
2995+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
2996+ SVE_EXT_COST + SVE_FCVT_COST},
2997+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
2998+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
2999+ {ISD::SINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3000+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i8,
3001+ SVE_EXT_COST + SVE_FCVT_COST},
3002+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i16, SVE_FCVT_COST},
3003+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i32, SVE_FCVT_COST},
3004+ {ISD::UINT_TO_FP, MVT::nxv2f32, MVT::nxv2i64, SVE_FCVT_COST},
3005+
29473006 // Complex: to v4f32
29483007 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
29493008 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
29503009 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
29513010 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
29523011
3012+ // SVE: to nxv4f32
3013+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i8,
3014+ SVE_EXT_COST + SVE_FCVT_COST},
3015+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3016+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3017+ {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i8,
3018+ SVE_EXT_COST + SVE_FCVT_COST},
3019+ {ISD::UINT_TO_FP, MVT::nxv4f32, MVT::nxv4i16, SVE_FCVT_COST},
3020+ {ISD::SINT_TO_FP, MVT::nxv4f32, MVT::nxv4i32, SVE_FCVT_COST},
3021+
29533022 // Complex: to v8f32
29543023 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
29553024 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
29563025 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
29573026 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
29583027
3028+ // SVE: to nxv8f32
3029+ {ISD::SINT_TO_FP, MVT::nxv8f32, MVT::nxv8i8,
3030+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3031+ {ISD::SINT_TO_FP, MVT::nxv8f32, MVT::nxv8i16,
3032+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3033+ {ISD::UINT_TO_FP, MVT::nxv8f32, MVT::nxv8i8,
3034+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3035+ {ISD::UINT_TO_FP, MVT::nxv8f32, MVT::nxv8i16,
3036+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3037+
3038+ // SVE: to nxv16f32
3039+ {ISD::SINT_TO_FP, MVT::nxv16f32, MVT::nxv16i8,
3040+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3041+ {ISD::UINT_TO_FP, MVT::nxv16f32, MVT::nxv16i8,
3042+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3043+
29593044 // Complex: to v16f32
29603045 {ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
29613046 {ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
@@ -2968,10 +3053,46 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
29683053 {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
29693054 {ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
29703055
3056+ // SVE: to nxv2f64
3057+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i8,
3058+ SVE_EXT_COST + SVE_FCVT_COST},
3059+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3060+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3061+ {ISD::SINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3062+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i8,
3063+ SVE_EXT_COST + SVE_FCVT_COST},
3064+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i16, SVE_FCVT_COST},
3065+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i32, SVE_FCVT_COST},
3066+ {ISD::UINT_TO_FP, MVT::nxv2f64, MVT::nxv2i64, SVE_FCVT_COST},
3067+
29713068 // Complex: to v4f64
29723069 {ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 4 },
29733070 {ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 4 },
29743071
3072+ // SVE: to nxv4f64
3073+ {ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i8,
3074+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3075+ {ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i16,
3076+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3077+ {ISD::SINT_TO_FP, MVT::nxv4f64, MVT::nxv4i32,
3078+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3079+ {ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i8,
3080+ SVE_EXT_COST + SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3081+ {ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i16,
3082+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3083+ {ISD::UINT_TO_FP, MVT::nxv4f64, MVT::nxv4i32,
3084+ SVE_UNPACK_ONCE + 2 * SVE_FCVT_COST},
3085+
3086+ // SVE: to nxv8f64
3087+ {ISD::SINT_TO_FP, MVT::nxv8f64, MVT::nxv8i8,
3088+ SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3089+ {ISD::SINT_TO_FP, MVT::nxv8f64, MVT::nxv8i16,
3090+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3091+ {ISD::UINT_TO_FP, MVT::nxv8f64, MVT::nxv8i8,
3092+ SVE_EXT_COST + SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3093+ {ISD::UINT_TO_FP, MVT::nxv8f64, MVT::nxv8i16,
3094+ SVE_UNPACK_TWICE + 4 * SVE_FCVT_COST},
3095+
29753096 // LowerVectorFP_TO_INT
29763097 {ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
29773098 {ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
0 commit comments