diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 68aec80f07e1d..1ee8116e1e481 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -4005,7 +4005,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( // have similar cost. auto VT = TLI->getValueType(DL, Ty); if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) { - if (Op2Info.isPowerOf2()) { + if (Op2Info.isPowerOf2() || Op2Info.isNegatedPowerOf2()) { + // Neg can be folded into the asr instruction. return ISD == ISD::SDIV ? (3 * AddCost + AsrCost) : (3 * AsrCost + AddCost); } else { @@ -4013,17 +4014,24 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( } } else if (VT.isVector()) { InstructionCost UsraCost = 2 * AsrCost; - if (Op2Info.isPowerOf2()) { + if (Op2Info.isPowerOf2() || Op2Info.isNegatedPowerOf2()) { // Division with scalable types corresponds to native 'asrd' // instruction when SVE is available. // e.g. %1 = sdiv %a, splat (i32 8) + + // One more for the negation in SDIV + InstructionCost Cost = + (Op2Info.isNegatedPowerOf2() && ISD == ISD::SDIV) ? AsrCost : 0; if (Ty->isScalableTy() && ST->hasSVE()) - return 2 * AsrCost; - return UsraCost + - (ISD == ISD::SDIV - ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * - AsrCost - : 2 * AddCost); + Cost += 2 * AsrCost; + else { + Cost += + UsraCost + + (ISD == ISD::SDIV + ? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost + : 2 * AddCost); + } + return Cost; } else if (LT.second == MVT::v2i64) { return VT.getVectorNumElements() * getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind, diff --git a/llvm/test/Analysis/CostModel/AArch64/div.ll b/llvm/test/Analysis/CostModel/AArch64/div.ll index 5367344ce573f..3a2358dba51b2 100644 --- a/llvm/test/Analysis/CostModel/AArch64/div.ll +++ b/llvm/test/Analysis/CostModel/AArch64/div.ll @@ -870,27 +870,27 @@ define void @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, -16 ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I128 = sdiv i128 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AArch64/rem.ll b/llvm/test/Analysis/CostModel/AArch64/rem.ll index d684e3af00b83..2fa62f1705911 100644 --- a/llvm/test/Analysis/CostModel/AArch64/rem.ll +++ b/llvm/test/Analysis/CostModel/AArch64/rem.ll @@ -870,27 +870,27 @@ define void @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' ; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, -16 ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I64 = srem i64 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I32 = srem i32 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = srem i16 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16) ; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = srem i8 undef, -16 -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %I128 = srem i128 undef, -16 diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll index 480c3146a210d..c055d3218f65b 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-div.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-div.ll @@ -479,42 +479,42 @@ define void @udiv_uniformconstpow2() { define void @sdiv_uniformconstnegpow2() { ; CHECK-LABEL: 'sdiv_uniformconstnegpow2' -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = sdiv undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = sdiv undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = sdiv undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = sdiv undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = sdiv undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = sdiv undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = sdiv undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = sdiv undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = sdiv undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = sdiv undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = sdiv undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = sdiv undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = sdiv undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = sdiv undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = sdiv undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = sdiv undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = sdiv undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = sdiv undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = sdiv undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = sdiv undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = sdiv undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = sdiv undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = sdiv undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = sdiv undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = sdiv undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = sdiv undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = sdiv undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = sdiv undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = sdiv undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = sdiv undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = sdiv undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = sdiv undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = sdiv undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:3 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = sdiv undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = sdiv undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = sdiv undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V2i64 = sdiv <2 x i64> undef, splat (i64 -16) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll index e2488735de4b5..eac8b66bcd216 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-rem.ll @@ -491,43 +491,43 @@ define void @urem_uniformconstpow2() { define void @srem_uniformconstnegpow2() { ; CHECK-LABEL: 'srem_uniformconstnegpow2' -; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found costs of RThru:Invalid CodeSize:4 Lat:4 SizeLat:4 for: %NV2i128 = srem undef, splat (i128 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = srem undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i64 = srem undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = srem undef, splat (i64 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = srem undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = srem undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i32 = srem undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = srem undef, splat (i32 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = srem undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = srem undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = srem undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i16 = srem undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = srem undef, splat (i16 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = srem undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = srem undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = srem undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = srem undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i8 = srem undef, splat (i8 -16) -; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = srem undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i64 = srem undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %NV4i64 = srem undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i64 = srem undef, splat (i64 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i32 = srem undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i32 = srem undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %NV8i32 = srem undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i32 = srem undef, splat (i32 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i16 = srem undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i16 = srem undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i16 = srem undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %NV16i16 = srem undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV32i16 = srem undef, splat (i16 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV2i8 = srem undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV4i8 = srem undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV8i8 = srem undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:2 CodeSize:4 Lat:4 SizeLat:4 for: %NV16i8 = srem undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of 4 for: %NV32i8 = srem undef, splat (i8 -16) +; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %NV64i8 = srem undef, splat (i8 -16) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; %V2i64 = srem <2 x i64> undef, splat (i64 -16)