Skip to content

Commit b9bdec3

Browse files
authored
[TTI][Vectorize] Migrate masked/gather-scatter/strided/expand-compress costing (NFCI) (#165532)
In #160470, there is a discussion about the possibility to explored a general approach for handling memory intrinsics. API changes: - Remove getMaskedMemoryOpCost, getGatherScatterOpCost, getExpandCompressMemoryOpCost, getStridedMemoryOpCost from Analysis/TargetTransformInfo. - Add getMemIntrinsicInstrCost. In BasicTTIImpl, map intrinsic IDs to existing target implementation until the legacy TTI hooks are retired. - masked_load/store → getMaskedMemoryOpCost - masked_/vp_gather/scatter → getGatherScatterOpCost - masked_expandload/compressstore → getExpandCompressMemoryOpCost - experimental_vp_strided_{load,store} → getStridedMemoryOpCost TODO: add support for vp_load_ff. No functional change intended; costs continue to route to the same target-specific hooks.
1 parent f40c694 commit b9bdec3

File tree

7 files changed

+203
-154
lines changed

7 files changed

+203
-154
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 17 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,9 @@ class MemIntrinsicCostAttributes {
129129
/// load/store to transform to the intrinsic.
130130
const Instruction *I = nullptr;
131131

132+
/// Address in memory.
133+
const Value *Ptr = nullptr;
134+
132135
/// Vector type of the data to be loaded or stored.
133136
Type *DataTy = nullptr;
134137

@@ -146,6 +149,13 @@ class MemIntrinsicCostAttributes {
146149
Align Alignment;
147150

148151
public:
152+
LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
153+
const Value *Ptr, bool VariableMask,
154+
Align Alignment,
155+
const Instruction *I = nullptr)
156+
: I(I), Ptr(Ptr), DataTy(DataTy), IID(Id), VariableMask(VariableMask),
157+
Alignment(Alignment) {}
158+
149159
LLVM_ABI MemIntrinsicCostAttributes(Intrinsic::ID Id, Type *DataTy,
150160
Align Alignment, unsigned AddressSpace)
151161
: DataTy(DataTy), IID(Id), AddressSpace(AddressSpace),
@@ -159,6 +169,7 @@ class MemIntrinsicCostAttributes {
159169

160170
Intrinsic::ID getID() const { return IID; }
161171
const Instruction *getInst() const { return I; }
172+
const Value *getPointer() const { return Ptr; }
162173
Type *getDataType() const { return DataTy; }
163174
bool getVariableMask() const { return VariableMask; }
164175
unsigned getAddressSpace() const { return AddressSpace; }
@@ -1608,44 +1619,6 @@ class TargetTransformInfo {
16081619
OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
16091620
const Instruction *I = nullptr) const;
16101621

1611-
/// \return The cost of masked Load and Store instructions.
1612-
LLVM_ABI InstructionCost getMaskedMemoryOpCost(
1613-
const MemIntrinsicCostAttributes &MICA,
1614-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1615-
1616-
/// \return The cost of Gather or Scatter operation
1617-
/// \p Opcode - is a type of memory access Load or Store
1618-
/// \p DataTy - a vector type of the data to be loaded or stored
1619-
/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1620-
/// \p VariableMask - true when the memory access is predicated with a mask
1621-
/// that is not a compile-time constant
1622-
/// \p Alignment - alignment of single element
1623-
/// \p I - the optional original context instruction, if one exists, e.g. the
1624-
/// load/store to transform or the call to the gather/scatter intrinsic
1625-
LLVM_ABI InstructionCost getGatherScatterOpCost(
1626-
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1627-
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1628-
const Instruction *I = nullptr) const;
1629-
1630-
/// \return The cost of Expand Load or Compress Store operation
1631-
LLVM_ABI InstructionCost getExpandCompressMemoryOpCost(
1632-
const MemIntrinsicCostAttributes &MICA,
1633-
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1634-
1635-
/// \return The cost of strided memory operations.
1636-
/// \p Opcode - is a type of memory access Load or Store
1637-
/// \p DataTy - a vector type of the data to be loaded or stored
1638-
/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1639-
/// \p VariableMask - true when the memory access is predicated with a mask
1640-
/// that is not a compile-time constant
1641-
/// \p Alignment - alignment of single element
1642-
/// \p I - the optional original context instruction, if one exists, e.g. the
1643-
/// load/store to transform or the call to the gather/scatter intrinsic
1644-
LLVM_ABI InstructionCost getStridedMemoryOpCost(
1645-
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1646-
Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1647-
const Instruction *I = nullptr) const;
1648-
16491622
/// \return The cost of the interleaved memory operation.
16501623
/// \p Opcode is the memory operation code
16511624
/// \p VecTy is the vector type of the interleaved access.
@@ -1724,6 +1697,12 @@ class TargetTransformInfo {
17241697
LLVM_ABI InstructionCost getIntrinsicInstrCost(
17251698
const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const;
17261699

1700+
/// \returns The cost of memory intrinsic instructions.
1701+
/// Used when IntrinsicInst is not materialized.
1702+
LLVM_ABI InstructionCost
1703+
getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
1704+
TTI::TargetCostKind CostKind) const;
1705+
17271706
/// \returns The cost of Call instructions.
17281707
LLVM_ABI InstructionCost getCallInstrCost(
17291708
Function *F, Type *RetTy, ArrayRef<Type *> Tys,

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -929,6 +929,11 @@ class TargetTransformInfoImplBase {
929929
return 1;
930930
}
931931

932+
virtual InstructionCost
933+
getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
934+
TTI::TargetCostKind CostKind) const {
935+
return 1;
936+
}
932937
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy,
933938
ArrayRef<Type *> Tys,
934939
TTI::TargetCostKind CostKind) const {

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 87 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1628,8 +1628,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16281628
if (UseMaskForCond || UseMaskForGaps) {
16291629
unsigned IID = Opcode == Instruction::Load ? Intrinsic::masked_load
16301630
: Intrinsic::masked_store;
1631-
Cost = thisT()->getMaskedMemoryOpCost(
1632-
{IID, VecTy, Alignment, AddressSpace}, CostKind);
1631+
Cost = thisT()->getMemIntrinsicInstrCost(
1632+
MemIntrinsicCostAttributes(IID, VecTy, Alignment, AddressSpace),
1633+
CostKind);
16331634
} else
16341635
Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
16351636
CostKind);
@@ -1829,9 +1830,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
18291830
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
18301831
Alignment = VPI->getPointerAlignment().valueOrOne();
18311832
bool VarMask = isa<Constant>(ICA.getArgs()[2]);
1832-
return thisT()->getGatherScatterOpCost(
1833-
Instruction::Store, ICA.getArgTypes()[0], ICA.getArgs()[1], VarMask,
1834-
Alignment, CostKind, nullptr);
1833+
return thisT()->getMemIntrinsicInstrCost(
1834+
MemIntrinsicCostAttributes(Intrinsic::vp_scatter,
1835+
ICA.getArgTypes()[0], ICA.getArgs()[1],
1836+
VarMask, Alignment, nullptr),
1837+
CostKind);
18351838
}
18361839
if (ICA.getID() == Intrinsic::vp_gather) {
18371840
if (ICA.isTypeBasedOnly()) {
@@ -1845,9 +1848,11 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
18451848
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
18461849
Alignment = VPI->getPointerAlignment().valueOrOne();
18471850
bool VarMask = isa<Constant>(ICA.getArgs()[1]);
1848-
return thisT()->getGatherScatterOpCost(
1849-
Instruction::Load, ICA.getReturnType(), ICA.getArgs()[0], VarMask,
1850-
Alignment, CostKind, nullptr);
1851+
return thisT()->getMemIntrinsicInstrCost(
1852+
MemIntrinsicCostAttributes(Intrinsic::vp_gather,
1853+
ICA.getReturnType(), ICA.getArgs()[0],
1854+
VarMask, Alignment, nullptr),
1855+
CostKind);
18511856
}
18521857

18531858
if (ICA.getID() == Intrinsic::vp_select ||
@@ -1952,30 +1957,34 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19521957
const Value *Mask = Args[2];
19531958
bool VarMask = !isa<Constant>(Mask);
19541959
Align Alignment = I->getParamAlign(1).valueOrOne();
1955-
return thisT()->getGatherScatterOpCost(Instruction::Store,
1956-
ICA.getArgTypes()[0], Args[1],
1957-
VarMask, Alignment, CostKind, I);
1960+
return thisT()->getMemIntrinsicInstrCost(
1961+
MemIntrinsicCostAttributes(Intrinsic::masked_scatter,
1962+
ICA.getArgTypes()[0], Args[1], VarMask,
1963+
Alignment, I),
1964+
CostKind);
19581965
}
19591966
case Intrinsic::masked_gather: {
19601967
const Value *Mask = Args[1];
19611968
bool VarMask = !isa<Constant>(Mask);
19621969
Align Alignment = I->getParamAlign(0).valueOrOne();
1963-
return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1964-
VarMask, Alignment, CostKind, I);
1970+
return thisT()->getMemIntrinsicInstrCost(
1971+
MemIntrinsicCostAttributes(Intrinsic::masked_gather, RetTy, Args[0],
1972+
VarMask, Alignment, I),
1973+
CostKind);
19651974
}
19661975
case Intrinsic::masked_compressstore: {
19671976
const Value *Data = Args[0];
19681977
const Value *Mask = Args[2];
19691978
Align Alignment = I->getParamAlign(1).valueOrOne();
1970-
return thisT()->getExpandCompressMemoryOpCost(
1979+
return thisT()->getMemIntrinsicInstrCost(
19711980
MemIntrinsicCostAttributes(IID, Data->getType(), !isa<Constant>(Mask),
19721981
Alignment, I),
19731982
CostKind);
19741983
}
19751984
case Intrinsic::masked_expandload: {
19761985
const Value *Mask = Args[1];
19771986
Align Alignment = I->getParamAlign(0).valueOrOne();
1978-
return thisT()->getExpandCompressMemoryOpCost(
1987+
return thisT()->getMemIntrinsicInstrCost(
19791988
MemIntrinsicCostAttributes(IID, RetTy, !isa<Constant>(Mask),
19801989
Alignment, I),
19811990
CostKind);
@@ -1989,9 +1998,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19891998
Type *EltTy = cast<VectorType>(Data->getType())->getElementType();
19901999
Align Alignment =
19912000
I->getParamAlign(1).value_or(thisT()->DL.getABITypeAlign(EltTy));
1992-
return thisT()->getStridedMemoryOpCost(Instruction::Store,
1993-
Data->getType(), Ptr, VarMask,
1994-
Alignment, CostKind, I);
2001+
return thisT()->getMemIntrinsicInstrCost(
2002+
MemIntrinsicCostAttributes(IID, Data->getType(), Ptr, VarMask,
2003+
Alignment, I),
2004+
CostKind);
19952005
}
19962006
case Intrinsic::experimental_vp_strided_load: {
19972007
const Value *Ptr = Args[0];
@@ -2001,8 +2011,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
20012011
Type *EltTy = cast<VectorType>(RetTy)->getElementType();
20022012
Align Alignment =
20032013
I->getParamAlign(0).value_or(thisT()->DL.getABITypeAlign(EltTy));
2004-
return thisT()->getStridedMemoryOpCost(Instruction::Load, RetTy, Ptr,
2005-
VarMask, Alignment, CostKind, I);
2014+
return thisT()->getMemIntrinsicInstrCost(
2015+
MemIntrinsicCostAttributes(IID, RetTy, Ptr, VarMask, Alignment, I),
2016+
CostKind);
20062017
}
20072018
case Intrinsic::stepvector: {
20082019
if (isa<ScalableVectorType>(RetTy))
@@ -2415,26 +2426,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
24152426
case Intrinsic::masked_store: {
24162427
Type *Ty = Tys[0];
24172428
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2418-
return thisT()->getMaskedMemoryOpCost({IID, Ty, TyAlign, 0}, CostKind);
2429+
return thisT()->getMemIntrinsicInstrCost(
2430+
MemIntrinsicCostAttributes(IID, Ty, TyAlign, 0), CostKind);
24192431
}
24202432
case Intrinsic::masked_load: {
24212433
Type *Ty = RetTy;
24222434
Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
2423-
return thisT()->getMaskedMemoryOpCost({IID, Ty, TyAlign, 0}, CostKind);
2435+
return thisT()->getMemIntrinsicInstrCost(
2436+
MemIntrinsicCostAttributes(IID, Ty, TyAlign, 0), CostKind);
24242437
}
24252438
case Intrinsic::experimental_vp_strided_store: {
24262439
auto *Ty = cast<VectorType>(ICA.getArgTypes()[0]);
24272440
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
2428-
return thisT()->getStridedMemoryOpCost(
2429-
Instruction::Store, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
2430-
Alignment, CostKind, ICA.getInst());
2441+
return thisT()->getMemIntrinsicInstrCost(
2442+
MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr,
2443+
/*VariableMask=*/true, Alignment,
2444+
ICA.getInst()),
2445+
CostKind);
24312446
}
24322447
case Intrinsic::experimental_vp_strided_load: {
24332448
auto *Ty = cast<VectorType>(ICA.getReturnType());
24342449
Align Alignment = thisT()->DL.getABITypeAlign(Ty->getElementType());
2435-
return thisT()->getStridedMemoryOpCost(
2436-
Instruction::Load, Ty, /*Ptr=*/nullptr, /*VariableMask=*/true,
2437-
Alignment, CostKind, ICA.getInst());
2450+
return thisT()->getMemIntrinsicInstrCost(
2451+
MemIntrinsicCostAttributes(IID, Ty, /*Ptr=*/nullptr,
2452+
/*VariableMask=*/true, Alignment,
2453+
ICA.getInst()),
2454+
CostKind);
24382455
}
24392456
case Intrinsic::vector_reduce_add:
24402457
case Intrinsic::vector_reduce_mul:
@@ -3022,6 +3039,48 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
30223039
return SingleCallCost;
30233040
}
30243041

3042+
/// Get memory intrinsic cost based on arguments.
3043+
InstructionCost
3044+
getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA,
3045+
TTI::TargetCostKind CostKind) const override {
3046+
unsigned Id = MICA.getID();
3047+
Type *DataTy = MICA.getDataType();
3048+
const Value *Ptr = MICA.getPointer();
3049+
const Instruction *I = MICA.getInst();
3050+
bool VariableMask = MICA.getVariableMask();
3051+
Align Alignment = MICA.getAlignment();
3052+
3053+
switch (Id) {
3054+
case Intrinsic::experimental_vp_strided_load:
3055+
case Intrinsic::experimental_vp_strided_store: {
3056+
unsigned Opcode = Id == Intrinsic::experimental_vp_strided_load
3057+
? Instruction::Load
3058+
: Instruction::Store;
3059+
return thisT()->getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
3060+
Alignment, CostKind, I);
3061+
}
3062+
case Intrinsic::masked_scatter:
3063+
case Intrinsic::masked_gather:
3064+
case Intrinsic::vp_scatter:
3065+
case Intrinsic::vp_gather: {
3066+
unsigned Opcode =
3067+
(Id == Intrinsic::masked_gather || Id == Intrinsic::vp_gather)
3068+
? Instruction::Load
3069+
: Instruction::Store;
3070+
return thisT()->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
3071+
Alignment, CostKind, I);
3072+
}
3073+
case Intrinsic::masked_load:
3074+
case Intrinsic::masked_store:
3075+
return thisT()->getMaskedMemoryOpCost(MICA, CostKind);
3076+
case Intrinsic::masked_compressstore:
3077+
case Intrinsic::masked_expandload:
3078+
return thisT()->getExpandCompressMemoryOpCost(MICA, CostKind);
3079+
default:
3080+
llvm_unreachable("unexpected intrinsic");
3081+
}
3082+
}
3083+
30253084
/// Compute a cost of the given call instruction.
30263085
///
30273086
/// Compute the cost of calling function F with return type RetTy and

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 8 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,41 +1190,6 @@ InstructionCost TargetTransformInfo::getMemoryOpCost(
11901190
return Cost;
11911191
}
11921192

1193-
InstructionCost TargetTransformInfo::getMaskedMemoryOpCost(
1194-
const MemIntrinsicCostAttributes &MICA,
1195-
TTI::TargetCostKind CostKind) const {
1196-
InstructionCost Cost = TTIImpl->getMaskedMemoryOpCost(MICA, CostKind);
1197-
assert(Cost >= 0 && "TTI should not produce negative costs!");
1198-
return Cost;
1199-
}
1200-
1201-
InstructionCost TargetTransformInfo::getGatherScatterOpCost(
1202-
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1203-
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
1204-
InstructionCost Cost = TTIImpl->getGatherScatterOpCost(
1205-
Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
1206-
assert((!Cost.isValid() || Cost >= 0) &&
1207-
"TTI should not produce negative costs!");
1208-
return Cost;
1209-
}
1210-
1211-
InstructionCost TargetTransformInfo::getExpandCompressMemoryOpCost(
1212-
const MemIntrinsicCostAttributes &MICA,
1213-
TTI::TargetCostKind CostKind) const {
1214-
InstructionCost Cost = TTIImpl->getExpandCompressMemoryOpCost(MICA, CostKind);
1215-
assert(Cost >= 0 && "TTI should not produce negative costs!");
1216-
return Cost;
1217-
}
1218-
1219-
InstructionCost TargetTransformInfo::getStridedMemoryOpCost(
1220-
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1221-
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) const {
1222-
InstructionCost Cost = TTIImpl->getStridedMemoryOpCost(
1223-
Opcode, DataTy, Ptr, VariableMask, Alignment, CostKind, I);
1224-
assert(Cost >= 0 && "TTI should not produce negative costs!");
1225-
return Cost;
1226-
}
1227-
12281193
InstructionCost TargetTransformInfo::getInterleavedMemoryOpCost(
12291194
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
12301195
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -1244,6 +1209,14 @@ TargetTransformInfo::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
12441209
return Cost;
12451210
}
12461211

1212+
InstructionCost TargetTransformInfo::getMemIntrinsicInstrCost(
1213+
const MemIntrinsicCostAttributes &MICA,
1214+
TTI::TargetCostKind CostKind) const {
1215+
InstructionCost Cost = TTIImpl->getMemIntrinsicInstrCost(MICA, CostKind);
1216+
assert(Cost >= 0 && "TTI should not produce negative costs!");
1217+
return Cost;
1218+
}
1219+
12471220
InstructionCost
12481221
TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
12491222
ArrayRef<Type *> Tys,

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5254,7 +5254,8 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
52545254
unsigned IID = I->getOpcode() == Instruction::Load
52555255
? Intrinsic::masked_load
52565256
: Intrinsic::masked_store;
5257-
Cost += TTI.getMaskedMemoryOpCost({IID, VectorTy, Alignment, AS}, CostKind);
5257+
Cost += TTI.getMemIntrinsicInstrCost(
5258+
MemIntrinsicCostAttributes(IID, VectorTy, Alignment, AS), CostKind);
52585259
} else {
52595260
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
52605261
Cost += TTI.getMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
@@ -5313,10 +5314,14 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
53135314
if (!Legal->isUniform(Ptr, VF))
53145315
PtrTy = toVectorTy(PtrTy, VF);
53155316

5317+
unsigned IID = I->getOpcode() == Instruction::Load
5318+
? Intrinsic::masked_gather
5319+
: Intrinsic::masked_scatter;
53165320
return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
5317-
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
5318-
Legal->isMaskRequired(I), Alignment,
5319-
CostKind, I);
5321+
TTI.getMemIntrinsicInstrCost(
5322+
MemIntrinsicCostAttributes(IID, VectorTy, Ptr,
5323+
Legal->isMaskRequired(I), Alignment, I),
5324+
CostKind);
53205325
}
53215326

53225327
InstructionCost

0 commit comments

Comments
 (0)