From 0a7eced42782b12bb728c0579f0fdf688165ce95 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 21 Mar 2025 09:27:45 +0000 Subject: [PATCH] [AArch64] Update costs for scalarizing i64->f32 int_to_fp. After #130665 these operations will be scalarized to avoid double-rounding. This updates the cost model to match. In the future we might be able to use SVE instructions to help, but for the moment the costs shoule be higher. Costsize and Latency costs are not yet expected to be accurate. --- .../AArch64/AArch64TargetTransformInfo.cpp | 16 +++++++++++-- llvm/test/Analysis/CostModel/AArch64/cast.ll | 16 ++++++------- .../CostModel/AArch64/no-sve-no-neon.ll | 14 +++++++---- .../Analysis/CostModel/AArch64/sve-cast.ll | 24 +++++++++---------- 4 files changed, 43 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a3bf8c53571f7..ec76a5b989db0 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -2939,10 +2939,8 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // Complex: to v2f32 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3}, - {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3}, {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3}, - {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2}, // Complex: to v4f32 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4}, @@ -3199,6 +3197,20 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) return AdjustCost(Entry->Cost); + // INT_TO_FP of i64->f32 will scalarize, which is required to avoid + // double-rounding issues. + if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) && + DstTy.getScalarType() == MVT::f32 && SrcTy.getScalarSizeInBits() > 32 && + isa(Dst) && isa(Src)) + return AdjustCost( + cast(Dst)->getNumElements() * + getCastInstrCost(Opcode, Dst->getScalarType(), Src->getScalarType(), + CCH, CostKind) + + BaseT::getScalarizationOverhead(cast(Src), false, true, + CostKind) + + BaseT::getScalarizationOverhead(cast(Dst), true, false, + CostKind)); + if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) && CCH == TTI::CastContextHint::Masked && ST->isSVEorStreamingSVEAvailable() && diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll index ddfb747d782ae..ad8646c2cba2a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -391,8 +391,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r179 = sitofp <2 x i64> undef to <2 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r180 = uitofp <2 x i1> undef to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r181 = sitofp <2 x i1> undef to <2 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r182 = uitofp <2 x i8> undef to <2 x double> @@ -411,8 +411,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of 1 for: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:1 Lat:1 SizeLat:1 for: %r199 = sitofp <4 x i64> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r200 = uitofp <4 x i1> undef to <4 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:1 Lat:1 SizeLat:1 for: %r201 = sitofp <4 x i1> undef to <4 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:9 CodeSize:1 Lat:1 SizeLat:1 for: %r202 = uitofp <4 x i8> undef to <4 x double> @@ -431,8 +431,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:1 SizeLat:1 for: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:36 CodeSize:1 Lat:1 SizeLat:1 for: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:7 CodeSize:1 Lat:1 SizeLat:1 for: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:19 CodeSize:1 Lat:1 SizeLat:1 for: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -451,8 +451,8 @@ define i32 @casts_no_users() { ; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:1 Lat:1 SizeLat:1 for: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found costs of RThru:72 CodeSize:1 Lat:1 SizeLat:1 for: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:15 CodeSize:1 Lat:1 SizeLat:1 for: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-NEXT: Cost Model: Found costs of RThru:39 CodeSize:1 Lat:1 SizeLat:1 for: %r242 = uitofp <16 x i8> undef to <16 x double> diff --git a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll index 4e6a36059d815..3100f4f8a3f66 100644 --- a/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll +++ b/llvm/test/Analysis/CostModel/AArch64/no-sve-no-neon.ll @@ -1,14 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2 -; RUN: opt -mattr=-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s -; RUN: opt -mattr=+sve,-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s +; RUN: opt -mattr=-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-NONEON +; RUN: opt -mattr=+sve,-neon < %s -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=CHECK-WITHSVE target triple = "aarch64-unknown-linux-gnu" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" define void @uitofp() { -; CHECK-LABEL: 'uitofp' -; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; CHECK-NONEON-LABEL: 'uitofp' +; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> +; CHECK-NONEON-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +; CHECK-WITHSVE-LABEL: 'uitofp' +; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %conv = uitofp <16 x i64> undef to <16 x float> +; CHECK-WITHSVE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %conv = uitofp <16 x i64> undef to <16 x float> ret void diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll index 0b051169a1b36..71411e93a0524 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-cast.ll @@ -696,8 +696,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> @@ -716,8 +716,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r195 = sitofp <4 x i16> undef to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r196 = uitofp <4 x i32> undef to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r197 = sitofp <4 x i32> undef to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r198 = uitofp <4 x i64> undef to <4 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %r199 = sitofp <4 x i64> undef to <4 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r200 = uitofp <4 x i1> undef to <4 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r201 = sitofp <4 x i1> undef to <4 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %r202 = uitofp <4 x i8> undef to <4 x double> @@ -736,8 +736,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r215 = sitofp <8 x i16> undef to <8 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r216 = uitofp <8 x i32> undef to <8 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r217 = sitofp <8 x i32> undef to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r218 = uitofp <8 x i64> undef to <8 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %r219 = sitofp <8 x i64> undef to <8 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r220 = uitofp <8 x i1> undef to <8 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %r221 = sitofp <8 x i1> undef to <8 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %r222 = uitofp <8 x i8> undef to <8 x double> @@ -756,8 +756,8 @@ define i32 @casts_no_users() { ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r235 = sitofp <16 x i16> undef to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r236 = uitofp <16 x i32> undef to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r237 = sitofp <16 x i32> undef to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> -; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r238 = uitofp <16 x i64> undef to <16 x float> +; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %r239 = sitofp <16 x i64> undef to <16 x float> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r240 = uitofp <16 x i1> undef to <16 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %r241 = sitofp <16 x i1> undef to <16 x double> ; CHECK-SVE-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %r242 = uitofp <16 x i8> undef to <16 x double> @@ -1122,8 +1122,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-256-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double> @@ -1335,8 +1335,8 @@ define i32 @casts_no_users() { ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %r175 = sitofp <2 x i16> undef to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r176 = uitofp <2 x i32> undef to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r177 = sitofp <2 x i32> undef to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> -; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r178 = uitofp <2 x i64> undef to <2 x float> +; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %r179 = sitofp <2 x i64> undef to <2 x float> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r180 = uitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %r181 = sitofp <2 x i1> undef to <2 x double> ; FIXED-MIN-2048-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %r182 = uitofp <2 x i8> undef to <2 x double>