Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4005,25 +4005,34 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// have similar cost.
auto VT = TLI->getValueType(DL, Ty);
if (VT.isScalarInteger() && VT.getSizeInBits() <= 64) {
if (Op2Info.isPowerOf2()) {
if (Op2Info.isPowerOf2() || Op2Info.isNegatedPowerOf2()) {
// Neg can be folded into the asr instruction.
// FIXME: Is the throughput cost of asr + neg the same as just asr?
return ISD == ISD::SDIV ? (3 * AddCost + AsrCost)
: (3 * AsrCost + AddCost);
} else {
return MulCost + AsrCost + 2 * AddCost;
}
} else if (VT.isVector()) {
InstructionCost UsraCost = 2 * AsrCost;
if (Op2Info.isPowerOf2()) {
if (Op2Info.isPowerOf2() || Op2Info.isNegatedPowerOf2()) {
// Division with scalable types corresponds to native 'asrd'
// instruction when SVE is available.
// e.g. %1 = sdiv <vscale x 4 x i32> %a, splat (i32 8)

// One more for the negation in SDIV
InstructionCost cost =
(Op2Info.isNegatedPowerOf2() && ISD == ISD::SDIV) ? AsrCost : 0;
if (Ty->isScalableTy() && ST->hasSVE())
return 2 * AsrCost;
return UsraCost +
(ISD == ISD::SDIV
? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) *
AsrCost
: 2 * AddCost);
cost += 2 * AsrCost;
else {
cost +=
UsraCost +
(ISD == ISD::SDIV
? (LT.second.getScalarType() == MVT::i64 ? 1 : 2) * AsrCost
: 2 * AddCost);
}
return cost;
} else if (LT.second == MVT::v2i64) {
return VT.getVectorNumElements() *
getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind,
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/AArch64/div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -870,27 +870,27 @@ define void @sdiv_uniformconstnegpow2() {
; CHECK-LABEL: 'sdiv_uniformconstnegpow2'
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = sdiv i128 undef, -16
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I64 = sdiv i64 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i64 = sdiv <2 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = sdiv <4 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = sdiv <8 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I32 = sdiv i32 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = sdiv <2 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = sdiv <4 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = sdiv <8 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = sdiv <16 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = sdiv i16 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = sdiv <2 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = sdiv <4 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = sdiv <8 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = sdiv <16 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = sdiv <32 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = sdiv i8 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = sdiv <2 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = sdiv <4 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = sdiv <8 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:5 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = sdiv <16 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = sdiv <32 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = sdiv <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%I128 = sdiv i128 undef, -16
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/AArch64/rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -870,27 +870,27 @@ define void @srem_uniformconstnegpow2() {
; CHECK-LABEL: 'srem_uniformconstnegpow2'
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:4 Lat:4 SizeLat:4 for: %I128 = srem i128 undef, -16
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I64 = srem i64 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:32 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i64 = srem <2 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V4i64 = srem <4 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V8i64 = srem <8 x i64> undef, splat (i64 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I32 = srem i32 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i32 = srem <2 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i32 = srem <4 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V8i32 = srem <8 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V16i32 = srem <16 x i32> undef, splat (i32 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I16 = srem i16 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i16 = srem <2 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i16 = srem <4 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i16 = srem <8 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V16i16 = srem <16 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V32i16 = srem <32 x i16> undef, splat (i16 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %I8 = srem i8 undef, -16
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:6 CodeSize:4 Lat:4 SizeLat:4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:24 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V2i8 = srem <2 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V4i8 = srem <4 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V8i8 = srem <8 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of 4 for: %V16i8 = srem <16 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:4 SizeLat:4 for: %V32i8 = srem <32 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:16 CodeSize:4 Lat:4 SizeLat:4 for: %V64i8 = srem <64 x i8> undef, splat (i8 -16)
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%I128 = srem i128 undef, -16
Expand Down
Loading
Loading