Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -902,38 +902,39 @@ TargetTransformInfo::getOperandInfo(const Value *V) {

// Check for a splat of a constant or for a non uniform vector of constants
// and check if the constant(s) are all powers of two.
if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
OpInfo = OK_NonUniformConstantValue;
if (Splat) {
if (Splat) {
// Check for a splat of a uniform value. This is not loop aware, so return
// true only for the obviously uniform cases (argument, globalvalue)
if (isa<Argument>(Splat) || isa<GlobalValue>(Splat)) {
OpInfo = OK_UniformValue;
} else if (isa<Constant>(Splat)) {
OpInfo = OK_UniformConstantValue;
if (auto *CI = dyn_cast<ConstantInt>(Splat)) {
if (CI->getValue().isPowerOf2())
OpProps = OP_PowerOf2;
else if (CI->getValue().isNegatedPowerOf2())
OpProps = OP_NegatedPowerOf2;
}
} else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
bool AllPow2 = true, AllNegPow2 = true;
for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) {
AllPow2 &= CI->getValue().isPowerOf2();
AllNegPow2 &= CI->getValue().isNegatedPowerOf2();
if (AllPow2 || AllNegPow2)
continue;
}
AllPow2 = AllNegPow2 = false;
break;
}
} else if (const auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
OpInfo = OK_NonUniformConstantValue;
bool AllPow2 = true, AllNegPow2 = true;
for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I))) {
AllPow2 &= CI->getValue().isPowerOf2();
AllNegPow2 &= CI->getValue().isNegatedPowerOf2();
if (AllPow2 || AllNegPow2)
continue;
}
OpProps = AllPow2 ? OP_PowerOf2 : OpProps;
OpProps = AllNegPow2 ? OP_NegatedPowerOf2 : OpProps;
AllPow2 = AllNegPow2 = false;
break;
}
OpProps = AllPow2 ? OP_PowerOf2 : OpProps;
OpProps = AllNegPow2 ? OP_NegatedPowerOf2 : OpProps;
} else if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
OpInfo = OK_NonUniformConstantValue;
}

// Check for a splat of a uniform value. This is not loop aware, so return
// true only for the obviously uniform cases (argument, globalvalue)
if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
OpInfo = OK_UniformValue;

return {OpInfo, OpProps};
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3532,7 +3532,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
case ISD::UDIV: {
auto VT = TLI->getValueType(DL, Ty);
if (Op2Info.isConstant() && Op2Info.isUniform()) {
if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT) &&
!VT.isScalableVector()) {
// Vector signed division by constant are expanded to the
// sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
// to MULHS + SUB + SRL + ADD + SRL.
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/AArch64/sve-div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -339,24 +339,24 @@ define void @sdiv_uniformconstpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V16i8 = sdiv <16 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 198 for instruction: %V32i8 = sdiv <32 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 396 for instruction: %V64i8 = sdiv <64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV2i64 = sdiv <vscale x 2 x i64> undef, splat (i64 16)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I take it that the issue that @hassnaaHamdi reported has gone away with the latest revision?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep for the moment, due to the opt-opt in llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp. They should change again in a future patch, but that should happen in a more reliable way where it applies to all vector types.

; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV4i64 = sdiv <vscale x 4 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV8i64 = sdiv <vscale x 8 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV2i32 = sdiv <vscale x 2 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV4i32 = sdiv <vscale x 4 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i32 = sdiv <vscale x 8 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV16i32 = sdiv <vscale x 16 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV2i16 = sdiv <vscale x 2 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV4i16 = sdiv <vscale x 4 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV8i16 = sdiv <vscale x 8 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV16i16 = sdiv <vscale x 16 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV32i16 = sdiv <vscale x 32 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV2i8 = sdiv <vscale x 2 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV4i8 = sdiv <vscale x 4 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV8i8 = sdiv <vscale x 8 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %NV16i8 = sdiv <vscale x 16 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV32i8 = sdiv <vscale x 32 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV64i8 = sdiv <vscale x 64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2i64 = sdiv <2 x i64> undef, splat (i64 16)
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/Analysis/CostModel/AArch64/sve-rem.ll
Original file line number Diff line number Diff line change
Expand Up @@ -339,24 +339,24 @@ define void @srem_uniformconstpow2() {
; CHECK-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V16i8 = srem <16 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %V32i8 = srem <32 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 404 for instruction: %V64i8 = srem <64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV2i64 = srem <vscale x 2 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %NV4i64 = srem <vscale x 4 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %NV8i64 = srem <vscale x 8 x i64> undef, splat (i64 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV2i32 = srem <vscale x 2 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV4i32 = srem <vscale x 4 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %NV8i32 = srem <vscale x 8 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %NV16i32 = srem <vscale x 16 x i32> undef, splat (i32 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV2i16 = srem <vscale x 2 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV4i16 = srem <vscale x 4 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV8i16 = srem <vscale x 8 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %NV16i16 = srem <vscale x 16 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %NV32i16 = srem <vscale x 32 x i16> undef, splat (i16 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV2i8 = srem <vscale x 2 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV4i8 = srem <vscale x 4 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV8i8 = srem <vscale x 8 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %NV16i8 = srem <vscale x 16 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %NV32i8 = srem <vscale x 32 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %NV64i8 = srem <vscale x 64 x i8> undef, splat (i8 16)
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%V2i64 = srem <2 x i64> undef, splat (i64 16)
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/RISCV/rvv-load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -512,8 +512,8 @@ define void @store_of_constant(ptr %p) {
; CHECK-LABEL: 'store_of_constant'
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16
; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16
Expand All @@ -530,8 +530,8 @@ define void @store_of_constant(ptr %p) {
; CODESIZE-LABEL: 'store_of_constant'
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> poison, ptr %p, align 16
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> undef, ptr %p, align 16
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> zeroinitializer, ptr %p, align 16
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> zeroinitializer, ptr %p, align 32
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 1), ptr %p, align 16
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i64> splat (i64 1), ptr %p, align 32
; CODESIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store <4 x i32> splat (i32 4096), ptr %p, align 16
Expand Down