Skip to content

Commit b0c35de

Browse files
committed
Implement cost calculation for generic lowering
1 parent 955435e commit b0c35de

File tree

4 files changed

+56
-28
lines changed

4 files changed

+56
-28
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -781,9 +781,7 @@ class TargetTransformInfoImplBase {
781781
default:
782782
break;
783783
case Intrinsic::experimental_vector_histogram_add:
784-
case Intrinsic::experimental_vector_match:
785-
// For now, we want explicit support from the target for histograms and
786-
// matches.
784+
// For now, we want explicit support from the target for histograms.
787785
return InstructionCost::getInvalid();
788786
case Intrinsic::allow_runtime_check:
789787
case Intrinsic::allow_ubsan_check:

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,35 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19351935

19361936
return Cost;
19371937
}
1938+
case Intrinsic::experimental_vector_match: {
1939+
auto *SearchTy = cast<VectorType>(ICA.getArgTypes()[0]);
1940+
auto *NeedleTy = cast<FixedVectorType>(ICA.getArgTypes()[1]);
1941+
unsigned SearchSize = NeedleTy->getNumElements();
1942+
1943+
// If we're not expanding the intrinsic then we assume this is cheap to
1944+
// implement.
1945+
EVT SearchVT = getTLI()->getValueType(DL, SearchTy);
1946+
if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
1947+
return getTypeLegalizationCost(RetTy).first;
1948+
1949+
// Approximate the cost based on the expansion code in
1950+
// SelectionDAGBuilder.
1951+
InstructionCost Cost = 0;
1952+
Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, NeedleTy,
1953+
CostKind, 1, nullptr, nullptr);
1954+
Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SearchTy,
1955+
CostKind, 0, nullptr, nullptr);
1956+
Cost += thisT()->getShuffleCost(TTI::SK_Broadcast, SearchTy, std::nullopt,
1957+
CostKind, 0, nullptr);
1958+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SearchTy, RetTy,
1959+
CmpInst::ICMP_EQ, CostKind);
1960+
Cost +=
1961+
thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1962+
Cost *= SearchSize;
1963+
Cost +=
1964+
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
1965+
return Cost;
1966+
}
19381967
}
19391968

19401969
// Assume that we need to scalarize this intrinsic.)

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -906,21 +906,22 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
906906
break;
907907
}
908908
case Intrinsic::experimental_vector_match: {
909-
EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
910-
unsigned SearchSize =
911-
cast<FixedVectorType>(ICA.getArgTypes()[1])->getNumElements();
912-
// If we can't lower to MATCH, return an invalid cost.
913-
if (getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
914-
return InstructionCost::getInvalid();
915-
// Base cost for MATCH instructions. At least on the Neoverse V2 and
916-
// Neoverse V3 these are cheap operations with the same latency as a vector
917-
// ADD. In most cases, however, we also need to do an extra DUP.
918-
InstructionCost Cost = 4;
919-
// For fixed-length vectors we currently need an extra five--six
920-
// instructions besides the MATCH.
921-
if (isa<FixedVectorType>(RetTy))
922-
Cost += 10;
923-
return Cost;
909+
if (auto *NeedleTy = dyn_cast<FixedVectorType>(ICA.getArgTypes()[1])) {
910+
EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
911+
unsigned SearchSize = NeedleTy->getNumElements();
912+
if (!getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize)) {
913+
// Base cost for MATCH instructions. At least on the Neoverse V2 and
914+
// Neoverse V3, these are cheap operations with the same latency as a
915+
// vector ADD. In most cases, however, we also need to do an extra DUP.
916+
// For fixed-length vectors we currently need an extra five--six
917+
// instructions besides the MATCH.
918+
InstructionCost Cost = 4;
919+
if (isa<FixedVectorType>(RetTy))
920+
Cost += 10;
921+
return Cost;
922+
}
923+
}
924+
break;
924925
}
925926
default:
926927
break;

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,23 +1364,23 @@ define void @match() #3 {
13641364
; CHECK-VSCALE-1-LABEL: 'match'
13651365
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
13661366
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1367-
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1368-
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1367+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1368+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
13691369
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
13701370
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1371-
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1372-
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1371+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1372+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
13731373
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
13741374
;
13751375
; CHECK-VSCALE-2-LABEL: 'match'
13761376
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
13771377
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1378-
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1379-
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1378+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1379+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
13801380
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
13811381
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1382-
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1383-
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1382+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1383+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
13841384
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
13851385
;
13861386
; TYPE_BASED_ONLY-LABEL: 'match'
@@ -1390,8 +1390,8 @@ define void @match() #3 {
13901390
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
13911391
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
13921392
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1393-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1394-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1393+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1394+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
13951395
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
13961396
;
13971397

0 commit comments

Comments
 (0)