diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a3bf8c53571f7..ec76a5b989db0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2939,10 +2939,8 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // Complex: to v2f32 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3}, - {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3}, - {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2}, // Complex: to v4f32 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4}, @@ -3199,6 +3197,20 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) return AdjustCost(Entry->Cost); + // INT_TO_FP of i64->f32 will scalarize, which is required to avoid + // double-rounding issues. + if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) && + DstTy.getScalarType() == MVT::f32 && SrcTy.getScalarSizeInBits() > 32 && + isa(Dst) && isa(Src)) + return AdjustCost( + cast(Dst)->getNumElements() * + getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(), + CCH, CostKind) + + BaseT::getScalarizationOverhead(cast(Src), false, true, + CostKind) + + BaseT::getScalarizationOverhead(cast(Dst), true, false, + CostKind)); + if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) && CCH == TTI::CastContextHint::Masked && ST->isSVEorStreamingSVEAvailable() && diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll index ddfb747d782ae..ad8646c2cba2a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -391,8 +391,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double> @@ -411,8 +411,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double> @@ -431,8 +431,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -451,8 +451,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double> diff --git a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll index 4e6a36059d815..3100f4f8a3f66 100644 --- a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll +++ b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll @@ -1,14 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -mattr=-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s -; RUN: opt -mattr=+sve,-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s +; RUN: opt -mattr=-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-NONEON +; RUN: opt -mattr=+sve,-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-WITHSVE target triple = "aarch64-unknown-linux-gnu" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @uitofp() { -; CHECK-LABEL: 'uitofp' -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NONEON-LABEL: 'uitofp' +; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> +; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-WITHSVE-LABEL: 'uitofp' +; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> +; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %conv = uitofp <16 x i64> undef to <16 x float> ret void diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll index 0b051169a1b36..71411e93a0524 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll @@ -696,8 +696,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> @@ -716,8 +716,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> @@ -736,8 +736,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -756,8 +756,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> @@ -1122,8 +1122,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> @@ -1335,8 +1335,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>