Skip to content

Commit d3da1a2

Browse files
committed
[TTI][CostModel] Add cost modeling for expandload and compressstore intrinsics
1 parent b270525 commit d3da1a2

File tree

13 files changed

+1723
-1444
lines changed

13 files changed

+1723
-1444
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1492,6 +1492,19 @@ class TargetTransformInfo {
14921492
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
14931493
const Instruction *I = nullptr) const;
14941494

1495+
/// \return The cost of Expand Load or Compress Store operation
1496+
/// \p Opcode - is a type of memory access Load or Store
1497+
/// \p Src - a vector type of the data to be loaded or stored
1498+
/// \p VariableMask - true when the memory access is predicated with a mask
1499+
/// that is not a compile-time constant
1500+
/// \p Alignment - alignment of single element
1501+
/// \p I - the optional original context instruction, if one exists, e.g. the
1502+
/// load/store to transform or the call to the gather/scatter intrinsic
1503+
InstructionCost getConsecutiveMemoryOpCost(
1504+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1505+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1506+
const Instruction *I = nullptr) const;
1507+
14951508
/// \return The cost of strided memory operations.
14961509
/// \p Opcode - is a type of memory access Load or Store
14971510
/// \p DataTy - a vector type of the data to be loaded or stored
@@ -2178,6 +2191,10 @@ class TargetTransformInfo::Concept {
21782191
TTI::TargetCostKind CostKind,
21792192
const Instruction *I = nullptr) = 0;
21802193
virtual InstructionCost
2194+
getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask,
2195+
Align Alignment, TTI::TargetCostKind CostKind,
2196+
const Instruction *I = nullptr) = 0;
2197+
virtual InstructionCost
21812198
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
21822199
bool VariableMask, Align Alignment,
21832200
TTI::TargetCostKind CostKind,
@@ -2898,6 +2915,13 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
28982915
Alignment, CostKind, I);
28992916
}
29002917
InstructionCost
2918+
getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask,
2919+
Align Alignment, TTI::TargetCostKind CostKind,
2920+
const Instruction *I = nullptr) override {
2921+
return Impl.getConsecutiveMemoryOpCost(Opcode, DataTy, VariableMask,
2922+
Alignment, CostKind, I);
2923+
}
2924+
InstructionCost
29012925
getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
29022926
bool VariableMask, Align Alignment,
29032927
TTI::TargetCostKind CostKind,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,13 @@ class TargetTransformInfoImplBase {
765765
return 1;
766766
}
767767

768+
InstructionCost
769+
getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy, bool VariableMask,
770+
Align Alignment, TTI::TargetCostKind CostKind,
771+
const Instruction *I = nullptr) const {
772+
return 1;
773+
}
774+
768775
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
769776
const Value *Ptr, bool VariableMask,
770777
Align Alignment,

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,6 +1469,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
14691469
true, CostKind);
14701470
}
14711471

1472+
InstructionCost getConsecutiveMemoryOpCost(unsigned Opcode, Type *DataTy,
1473+
bool VariableMask, Align Alignment,
1474+
TTI::TargetCostKind CostKind,
1475+
const Instruction *I = nullptr) {
1476+
return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1477+
true, CostKind);
1478+
}
1479+
14721480
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
14731481
const Value *Ptr, bool VariableMask,
14741482
Align Alignment,
@@ -1777,6 +1785,21 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
17771785
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
17781786
VarMask, Alignment, CostKind, I);
17791787
}
1788+
case Intrinsic::masked_compressstore: {
1789+
const Value *Data = Args[0];
1790+
const Value *Mask = Args[2];
1791+
Align Alignment = I->getParamAlign(1).valueOrOne();
1792+
return thisT()->getConsecutiveMemoryOpCost(
1793+
Instruction::Store, Data->getType(), !isa<Constant>(Mask), Alignment,
1794+
CostKind, I);
1795+
}
1796+
case Intrinsic::masked_expandload: {
1797+
const Value *Mask = Args[1];
1798+
Align Alignment = I->getParamAlign(0).valueOrOne();
1799+
return thisT()->getConsecutiveMemoryOpCost(Instruction::Load, RetTy,
1800+
!isa<Constant>(Mask),
1801+
Alignment, CostKind, I);
1802+
}
17801803
case Intrinsic::experimental_vp_strided_store: {
17811804
const Value *Data = Args[0];
17821805
const Value *Ptr = Args[1];

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1135,6 +1135,15 @@ InstructionCost TargetTransformInfo::getGatherScatterOpCost(
11351135
return Cost;
11361136
}
11371137

1138+
InstructionCost TargetTransformInfo::getConsecutiveMemoryOpCost(
1139+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
1140+
TTI::TargetCostKind CostKind, const Instruction *I) const {
1141+
InstructionCost Cost = TTIImpl->getConsecutiveMemoryOpCost(
1142+
Opcode, DataTy, VariableMask, Alignment, CostKind, I);
1143+
assert(Cost >= 0 && "TTI should not produce negative costs!");
1144+
return Cost;
1145+
}
1146+
11381147
InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
11391148
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
11401149
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -838,6 +838,43 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
838838
return NumLoads * MemOpCost;
839839
}
840840

841+
InstructionCost RISCVTTIImpl::getConsecutiveMemoryOpCost(
842+
unsigned Opcode, Type *DataTy, bool VariableMask, Align Alignment,
843+
TTI::TargetCostKind CostKind, const Instruction *I) {
844+
bool IsLegal = (Opcode == Instruction::Store &&
845+
isLegalMaskedCompressStore(DataTy, Alignment)) ||
846+
(Opcode == Instruction::Load &&
847+
isLegalMaskedExpandLoad(DataTy, Alignment));
848+
if (!IsLegal || CostKind != TTI::TCK_RecipThroughput)
849+
return BaseT::getConsecutiveMemoryOpCost(Opcode, DataTy, VariableMask,
850+
Alignment, CostKind, I);
851+
// Example compressstore sequence:
852+
// vsetivli zero, 8, e32, m2, ta, ma (ignored)
853+
// vcompress.vm v10, v8, v0
854+
// vcpop.m a1, v0
855+
// vsetvli zero, a1, e32, m2, ta, ma
856+
// vse32.v v10, (a0)
857+
// Example expandload sequence:
858+
// vsetivli zero, 8, e8, mf2, ta, ma (ignored)
859+
// vcpop.m a1, v0
860+
// vsetvli zero, a1, e32, m2, ta, ma
861+
// vle32.v v10, (a0)
862+
// vsetivli zero, 8, e32, m2, ta, ma
863+
// viota.m v12, v0
864+
// vrgather.vv v8, v10, v12, v0.t
865+
auto MemOpCost = getMemoryOpCost(Opcode, DataTy, Alignment, 0, CostKind);
866+
auto LT = getTypeLegalizationCost(DataTy);
867+
SmallVector<unsigned, 4> Opcodes{RISCV::VSETVLI};
868+
if (VariableMask)
869+
Opcodes.push_back(RISCV::VCPOP_M);
870+
if (Opcode == Instruction::Store)
871+
Opcodes.append({RISCV::VCOMPRESS_VM});
872+
else
873+
Opcodes.append({RISCV::VSETIVLI, RISCV::VIOTA_M, RISCV::VRGATHER_VV});
874+
return MemOpCost +
875+
LT.first * getRISCVInstructionCost(Opcodes, LT.second, CostKind);
876+
}
877+
841878
InstructionCost RISCVTTIImpl::getStridedMemoryOpCost(
842879
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
843880
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,11 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
166166
TTI::TargetCostKind CostKind,
167167
const Instruction *I);
168168

169+
InstructionCost getConsecutiveMemoryOpCost(unsigned Opcode, Type *Src,
170+
bool VariableMask, Align Alignment,
171+
TTI::TargetCostKind CostKind,
172+
const Instruction *I = nullptr);
173+
169174
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy,
170175
const Value *Ptr, bool VariableMask,
171176
Align Alignment,

llvm/test/Analysis/CostModel/RISCV/gep.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
268268
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> <i32 42, i32 43>
269269
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef)
270270
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
271-
; RVI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
271+
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
272272
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
273273
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
274274
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
@@ -280,7 +280,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
280280
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> <i32 42, i32 43>
281281
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef)
282282
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
283-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
283+
; RVI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
284284
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
285285
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
286286
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
@@ -338,7 +338,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
338338
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %3 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> zeroinitializer
339339
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %x3 = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %3, i32 1, <2 x i1> undef, <2 x i8> undef)
340340
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
341-
; RVI-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
341+
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
342342
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
343343
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
344344
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
@@ -350,7 +350,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
350350
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %9 = getelementptr i8, <2 x ptr> %base.vec, <2 x i32> zeroinitializer
351351
; RVI-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> undef, <2 x ptr> %9, i32 1, <2 x i1> undef)
352352
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
353-
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
353+
; RVI-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
354354
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
355355
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
356356
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0

0 commit comments

Comments
 (0)