Skip to content

Commit 15df9dd

Browse files
committed
Move base cost calculation to getTypeBasedIntrinsicInstrCost
1 parent b0c35de commit 15df9dd

File tree

3 files changed

+46
-47
lines changed

3 files changed

+46
-47
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,35 +1935,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19351935

19361936
return Cost;
19371937
}
1938-
case Intrinsic::experimental_vector_match: {
1939-
auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]);
1940-
auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);
1941-
unsigned SearchSize = NeedleTy->getNumElements();
1942-
1943-
// If we're not expanding the intrinsic then we assume this is cheap to
1944-
// implement.
1945-
EVT SearchVT = getTLI()->getValueType(DL, SearchTy);
1946-
if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
1947-
return getTypeLegalizationCost(RetTy).first;
1948-
1949-
// Approximate the cost based on the expansion code in
1950-
// SelectionDAGBuilder.
1951-
InstructionCost Cost = 0;
1952-
Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,
1953-
CostKind, 1, nullptr, nullptr);
1954-
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
1955-
CostKind, 0, nullptr, nullptr);
1956-
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
1957-
CostKind, 0, nullptr);
1958-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
1959-
CmpInst::ICMP_EQ, CostKind);
1960-
Cost +=
1961-
thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1962-
Cost *= SearchSize;
1963-
Cost +=
1964-
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
1965-
return Cost;
1966-
}
19671938
}
19681939

19691940
// Assume that we need to scalarize this intrinsic.)
@@ -2219,6 +2190,35 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
22192190
case Intrinsic::vector_reduce_fminimum:
22202191
return thisT()->getMinMaxReductionCost(getMinMaxReductionIntrinsicOp(IID),
22212192
VecOpTy, ICA.getFlags(), CostKind);
2193+
case Intrinsic::experimental_vector_match: {
2194+
auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]);
2195+
auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);
2196+
unsigned SearchSize = NeedleTy->getNumElements();
2197+
2198+
// If we're not expanding the intrinsic then we assume this is cheap to
2199+
// implement.
2200+
EVT SearchVT = getTLI()->getValueType(DL, SearchTy);
2201+
if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
2202+
return getTypeLegalizationCost(RetTy).first;
2203+
2204+
// Approximate the cost based on the expansion code in
2205+
// SelectionDAGBuilder.
2206+
InstructionCost Cost = 0;
2207+
Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,
2208+
CostKind, 1, nullptr, nullptr);
2209+
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
2210+
CostKind, 0, nullptr, nullptr);
2211+
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
2212+
CostKind, 0, nullptr);
2213+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
2214+
CmpInst::ICMP_EQ, CostKind);
2215+
Cost +=
2216+
thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
2217+
Cost *= SearchSize;
2218+
Cost +=
2219+
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
2220+
return Cost;
2221+
}
22222222
case Intrinsic::abs:
22232223
ISD = ISD::ABS;
22242224
break;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -906,20 +906,19 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
906906
break;
907907
}
908908
case Intrinsic::experimental_vector_match: {
909-
if (auto *NeedleTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[1])) {
910-
EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
911-
unsigned SearchSize = NeedleTy->getNumElements();
912-
if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
913-
// Base cost for MATCH instructions. At least on the Neoverse V2 and
914-
// Neoverse V3, these are cheap operations with the same latency as a
915-
// vector ADD. In most cases, however, we also need to do an extra DUP.
916-
// For fixed-length vectors we currently need an extra five--six
917-
// instructions besides the MATCH.
918-
InstructionCost Cost = 4;
919-
if (isa<FixedVectorType>(RetTy))
920-
Cost += 10;
921-
return Cost;
922-
}
909+
auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);
910+
EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
911+
unsigned SearchSize = NeedleTy->getNumElements();
912+
if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
913+
// Base cost for MATCH instructions. At least on the Neoverse V2 and
914+
// Neoverse V3, these are cheap operations with the same latency as a
915+
// vector ADD. In most cases, however, we also need to do an extra DUP.
916+
// For fixed-length vectors we currently need an extra five--six
917+
// instructions besides the MATCH.
918+
InstructionCost Cost = 4;
919+
if (isa<FixedVectorType>(RetTy))
920+
Cost += 10;
921+
return Cost;
923922
}
924923
break;
925924
}

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,12 +1386,12 @@ define void @match() #3 {
13861386
; TYPE_BASED_ONLY-LABEL: 'match'
13871387
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
13881388
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1389-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1390-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1389+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1390+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
13911391
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
13921392
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1393-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1394-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1393+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1394+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
13951395
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
13961396
;
13971397

0 commit comments

Comments
 (0)