-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[PowerPC] cost modeling for length type VP intrinsic load/store #168938
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-backend-powerpc Author: None (RolandF77) ChangesOverride and fill in the target hooks for PPC that allow opt to cost using length style VP intrinsics for load/store. Full diff: https://github.com/llvm/llvm-project/pull/168938.diff 3 Files Affected:
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index fbed34277dbab..f17a7a87313c6 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -24,6 +24,10 @@ using namespace llvm;
#define DEBUG_TYPE "ppctti"
+static cl::opt<bool> PPCEVL("ppc-evl",
+ cl::desc("Allow EVL type vp.load/vp.store"),
+ cl::init(false), cl::Hidden);
+
static cl::opt<bool> Pwr9EVL("ppc-pwr9-evl",
cl::desc("Allow vp.load and vp.store for pwr9"),
cl::init(false), cl::Hidden);
@@ -1078,3 +1082,62 @@ PPCTTIImpl::getVPLegalizationStrategy(const VPIntrinsic &PI) const {
return VPLegalization(VPLegalization::Legal, VPLegalization::Legal);
}
+
+bool PPCTTIImpl::hasActiveVectorLength() const {
+ unsigned CPU = ST->getCPUDirective();
+ if (!PPCEVL)
+ return false;
+ if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
+ (Pwr9EVL && CPU == PPC::DIR_PWR9))
+ return true;
+ return false;
+}
+
+static inline bool isLegalLoadWithLengthType(EVT VT) {
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8)
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ if (!hasActiveVectorLength())
+ return false;
+ if (!isLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true)))
+ return false;
+ return true;
+}
+
+bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const {
+ return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
+}
+
+InstructionCost
+PPCTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
+ Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const {
+ InstructionCost BaseCost =
+ BaseT::getMaskedMemoryOpCost(Opcode, DataTy, Alignment, AddressSpace,
+ CostKind);
+
+ if (Opcode != Instruction::Load && Opcode != Instruction::Store)
+ return BaseCost;
+ auto VecTy = dyn_cast<FixedVectorType>(DataTy);
+ if (!VecTy)
+ return BaseCost;
+ if (!isLegalMaskedLoad(VecTy->getScalarType(), Alignment, AddressSpace))
+ return BaseCost;
+ if (VecTy->getPrimitiveSizeInBits() > 128)
+ return BaseCost;
+
+ // Is scalar compare + select + maybe shift + vector load
+ InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+ InstructionCost Cost = 2 + Adj;
+ if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
+ VecTy->getScalarSizeInBits() != 8)
+ Cost += 1; // need shift
+ return Cost;
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index f80ebdbce7f64..f7fd5da94b1cb 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -153,6 +153,18 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override;
+ bool hasActiveVectorLength() const override;
+
+ bool isLegalMaskedStore(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment,
+ unsigned AddressSpace) const override;
+
+ InstructionCost
+ getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) const override;
+
private:
// The following constant is used for estimating costs on power9.
static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
diff --git a/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
new file mode 100644
index 0000000000000..df5b5b68677a4
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -mcpu=pwr9 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P9
+; RUN: opt < %s -mcpu=pwr10 -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=P10
+; RUN: opt < %s -mcpu=future -ppc-evl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefix=FUTURE
+target datalayout = "e-m:e-Fn32-i64:64-i128:128-n32:64-S128-v256:256:256-v512:512:512"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @bar(ptr %base, <2 x ptr> %base.vec) {
+; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
+; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
+; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
+; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
+ %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> undef, <2 x i8> undef)
+
+ call void @llvm.masked.store.v2i8.p0(<2 x i8> undef, ptr %base, i32 1, <2 x i1> undef)
+
+ ret void
+}
|
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions h,cpp -- llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h --diff_from_common_commit
View the diff from clang-format here.diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 11ac63ebe..74b744e33 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1137,8 +1137,8 @@ PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
// Cost is 1 (scalar compare) + 1 (scalar select) +
// 1 * vectorCostAdjustmentFactor (vector load with length)
// Maybe + 1 (scalar shift)
- InstructionCost Cost = 1 + 1 +
- vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
+ InstructionCost Cost =
+ 1 + 1 + vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
VecTy->getScalarSizeInBits() != 8)
Cost += 1; // need shift for length
|
|
✅ With the latest revision this PR passed the undef deprecator. |
🐧 Linux x64 Test Results
All tests passed but another part of the build failed. Click on a failure below to see the details. lib/Target/PowerPC/CMakeFiles/LLVMPowerPCCodeGen.dir/PPCTargetTransformInfo.cpp.olib/Target/PowerPC/CMakeFiles/LLVMPowerPCCodeGen.dir/PPCTargetMachine.cpp.oIf these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the |
| return false; | ||
| } | ||
|
|
||
| static inline bool isLegalLoadWithLengthType(EVT VT) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since the function is a simple static function and only used in the isLegalMaskedLoad , we can make the function as lamda function in the isLegalMaskedLoad
| return true; | ||
| } | ||
|
|
||
| bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I do not think the function isLegalMaskedStore used in the patch ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is exposed to external users. It is conceptually used in the patch through getMaskedMemoryOpCost but since it is currently the same as for load I didn't differentiate. I guess I can make this clearer by calling both from there.
| return true; | ||
| } | ||
|
|
||
| bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it looks the function isLegalMaskedLoad has same logic and argument as isLegalMaskedStore, can we change the function name to isLegalMaskedLoadStore ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These are target hooks - methods defined in TargetTransformInfo in Analysis to give info to opt passes about target characteristics, which we override to provide PPC specific answers. We can't change the names.
|
|
||
| // Is scalar compare + select + maybe shift + vector load | ||
| InstructionCost Adj = vectorCostAdjustmentFactor(Opcode, DataTy, nullptr); | ||
| InstructionCost Cost = 2 + Adj; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: change to
InstructionCost Cost = 2 + vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
and I am curiously what the 2 means for? is it possible to have comment for it ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is what the above comment is supposed to be for. I can try to make it more clear.
diggerlin
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8) | ||
| return false; | ||
| return true; | ||
| }; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: add a blank after the line for more readable.
| if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE || | ||
| (Pwr9EVL && CPU == PPC::DIR_PWR9)) | ||
| return true; | ||
| return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could just be
| if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE || | |
| (Pwr9EVL && CPU == PPC::DIR_PWR9)) | |
| return true; | |
| return false; | |
| return CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE || | |
| (Pwr9EVL && CPU == PPC::DIR_PWR9); |
This means vector predication intrinsics are supported? This would need some tests with vp intrinsics, which are different to masked ones used in the tests.
| %x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val) | ||
|
|
||
| call void @llvm.masked.store.v2i8.p0(<2 x i8> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you should cover at least all supported types + some unsupported ones
Override and fill in the target hooks for PPC that allow opt to cost using length style VP intrinsics for load/store.