Skip to content

Commit 290a878

Browse files
committed
update
1 parent 1e54b21 commit 290a878

File tree

3 files changed

+73
-18
lines changed

3 files changed

+73
-18
lines changed

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,17 @@ InstructionCost PPCTTIImpl::getInterleavedMemoryOpCost(
905905
InstructionCost
906906
PPCTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
907907
TTI::TargetCostKind CostKind) const {
908+
/*
909+
if (ICA.getID() == Intrinsic::vp_load) {
910+
MemIntrinsicCostAttributes MICA(Intrinsic::masked_load, ICA.getReturnType(), Align(1), 0);
911+
return getMaskedMemoryOpCost(MICA, CostKind);
912+
}
913+
if (ICA.getID() == Intrinsic::vp_store) {
914+
Type *Ty = ICA.getArgTypes()[0];
915+
MemIntrinsicCostAttributes MICA(Intrinsic::masked_store, ICA.getArgTypes()[0], Align(1), 0);
916+
return getMaskedMemoryOpCost(MICA, CostKind);
917+
}
918+
*/
908919
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
909920
}
910921

@@ -1087,26 +1098,28 @@ bool PPCTTIImpl::hasActiveVectorLength() const {
10871098
unsigned CPU = ST->getCPUDirective();
10881099
if (!PPCEVL)
10891100
return false;
1090-
if (CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
1091-
(Pwr9EVL && CPU == PPC::DIR_PWR9))
1092-
return true;
1093-
return false;
1101+
return CPU == PPC::DIR_PWR10 || CPU == PPC::DIR_PWR_FUTURE ||
1102+
(Pwr9EVL && CPU == PPC::DIR_PWR9);
10941103
}
10951104

10961105
bool PPCTTIImpl::isLegalMaskedLoad(Type *DataType, Align Alignment,
1097-
unsigned AddressSpace) const {
1106+
unsigned AddressSpace,
1107+
TTI::MaskKind MaskKind) const {
10981108
if (!hasActiveVectorLength())
10991109
return false;
1110+
11001111
auto IsLegalLoadWithLengthType = [](EVT VT) {
11011112
if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
11021113
return false;
11031114
return true;
11041115
};
1116+
11051117
return IsLegalLoadWithLengthType(TLI->getValueType(DL, DataType, true));
11061118
}
11071119

11081120
bool PPCTTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment,
1109-
unsigned AddressSpace) const {
1121+
unsigned AddressSpace,
1122+
TTI::MaskKind MaskKind) const {
11101123
return isLegalMaskedLoad(DataType, Alignment, AddressSpace);
11111124
}
11121125

@@ -1137,8 +1150,8 @@ PPCTTIImpl::getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,
11371150
// Cost is 1 (scalar compare) + 1 (scalar select) +
11381151
// 1 * vectorCostAdjustmentFactor (vector load with length)
11391152
// Maybe + 1 (scalar shift)
1140-
InstructionCost Cost = 1 + 1 +
1141-
vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
1153+
InstructionCost Cost =
1154+
1 + 1 + vectorCostAdjustmentFactor(Opcode, DataTy, nullptr);
11421155
if (ST->getCPUDirective() != PPC::DIR_PWR_FUTURE ||
11431156
VecTy->getScalarSizeInBits() != 8)
11441157
Cost += 1; // need shift for length

llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,14 @@ class PPCTTIImpl final : public BasicTTIImplBase<PPCTTIImpl> {
155155

156156
bool hasActiveVectorLength() const override;
157157

158-
bool isLegalMaskedStore(Type *DataType, Align Alignment,
159-
unsigned AddressSpace) const override;
160-
bool isLegalMaskedLoad(Type *DataType, Align Alignment,
161-
unsigned AddressSpace) const override;
158+
bool
159+
isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace,
160+
TTI::MaskKind MaskKind =
161+
TTI::MaskKind::VariableOrConstantMask) const override;
162+
bool
163+
isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace,
164+
TTI::MaskKind MaskKind =
165+
TTI::MaskKind::VariableOrConstantMask) const override;
162166

163167
InstructionCost
164168
getMaskedMemoryOpCost(const MemIntrinsicCostAttributes &MICA,

llvm/test/Analysis/CostModel/PowerPC/ld-st-with-length.ll

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,52 @@ target triple = "powerpc64le-unknown-linux-gnu"
66

77
define void @bar(ptr %base, <2 x i8> %val) {
88
; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i8.p0
9-
; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
10-
; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
119
; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i8.p0
10+
; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i16.p0
11+
; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i16.p0
12+
; P9: cost of 16 for {{.*}} @llvm.masked.load.v2i32.p0
13+
; P9: cost of 12 for {{.*}} @llvm.masked.store.v2i32.p0
14+
; P9: cost of 12 for {{.*}} @llvm.masked.load.v2i64.p0
15+
; P9: cost of 10 for {{.*}} @llvm.masked.store.v2i64.p0
16+
; P9: cost of 36 for {{.*}} @llvm.masked.load.v3i64.p0
17+
; P9: cost of 15 for {{.*}} @llvm.masked.store.v3i64.p0
18+
; P9: cost of 32 for {{.*}} @llvm.masked.load.v4i15.p0
19+
; P9: cost of 24 for {{.*}} @llvm.masked.store.v4i15.p0
20+
; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i8.p0
1221
; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i8.p0
22+
; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i16.p0
23+
; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i16.p0
24+
; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i32.p0
25+
; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i32.p0
26+
; P10: cost of 4 for {{.*}} @llvm.masked.load.v2i64.p0
27+
; P10: cost of 4 for {{.*}} @llvm.masked.store.v2i64.p0
28+
; P10: cost of 24 for {{.*}} @llvm.masked.load.v3i64.p0
29+
; P10: cost of 12 for {{.*}} @llvm.masked.store.v3i64.p0
30+
; P10: cost of 16 for {{.*}} @llvm.masked.load.v4i15.p0
31+
; P10: cost of 16 for {{.*}} @llvm.masked.store.v4i15.p0
32+
; FUTURE: cost of 3 for {{.*}} @llvm.masked.load.v2i8.p0
1333
; FUTURE: cost of 3 for {{.*}} @llvm.masked.store.v2i8.p0
14-
%x2 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val)
15-
16-
call void @llvm.masked.store.v2i8.p0(<2 x i8> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
17-
34+
; FUTURE: cost of 4 for {{.*}} @llvm.masked.load.v2i16.p0
35+
; FUTURE: cost of 4 for {{.*}} @llvm.masked.store.v2i16.p0
36+
; FUTURE: cost of 4 for {{.*}} @llvm.masked.load.v2i32.p0
37+
; FUTURE: cost of 4 for {{.*}} @llvm.masked.store.v2i32.p0
38+
; FUTURE: cost of 4 for {{.*}} @llvm.masked.load.v2i64.p0
39+
; FUTURE: cost of 4 for {{.*}} @llvm.masked.store.v2i64.p0
40+
; FUTURE: cost of 24 for {{.*}} @llvm.masked.load.v3i64.p0
41+
; FUTURE: cost of 12 for {{.*}} @llvm.masked.store.v3i64.p0
42+
; FUTURE: cost of 16 for {{.*}} @llvm.masked.load.v4i15.p0
43+
; FUTURE: cost of 16 for {{.*}} @llvm.masked.store.v4i15.p0
44+
%x1 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i8> %val)
45+
call void @llvm.masked.store.v2i8.p0(<2 x i8> %x1, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
46+
%x2 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i16> <i16 0, i16 0>)
47+
call void @llvm.masked.store.v2i16.p0(<2 x i16> %x2, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
48+
%x3 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i32> <i32 0, i32 0>)
49+
call void @llvm.masked.store.v2i32.p0(<2 x i32> %x3, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
50+
%x4 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr %base, i32 1, <2 x i1> <i1 1, i1 1>, <2 x i64> <i64 0, i64 0>)
51+
call void @llvm.masked.store.v2i64.p0(<2 x i64> %x4, ptr %base, i32 1, <2 x i1> <i1 1, i1 1>)
52+
%x5 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr %base, i32 1, <3 x i1> <i1 1, i1 1, i1 1>, <3 x i64> <i64 0, i64 0, i64 0>)
53+
call void @llvm.masked.store.v3i64.p0(<3 x i64> %x5, ptr %base, i32 1, <3 x i1> <i1 1, i1 1, i1 1>)
54+
%x6 = call <4 x i15> @llvm.masked.load.v4i15.p0(ptr %base, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i15> <i15 0, i15 0, i15 0, i15 0>)
55+
call void @llvm.masked.store.v4i15.p0(<4 x i15> %x6, ptr %base, i32 1, <4 x i1> <i1 1, i1 1, i1 1, i1 1>)
1856
ret void
1957
}

0 commit comments

Comments
 (0)