Skip to content

Commit 955435e

Browse files
committed
[AArch64] Add cost model for @experimental.vector.match
Currently, vector types that would default to generic lowering (i.e. that would not utilise a MATCH instruction) return invalid cost. Fixed-length search vectors have a higher cost than scalable vectors because we need a few more instructions to convert the boolean mask.
1 parent b2df007 commit 955435e

File tree

3 files changed

+68
-1
lines changed

3 files changed

+68
-1
lines changed

llvm/include/llvm/Analysis/TargetTransformInfoImpl.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,9 @@ class TargetTransformInfoImplBase {
781781
default:
782782
break;
783783
case Intrinsic::experimental_vector_histogram_add:
784-
// For now, we want explicit support from the target for histograms.
784+
case Intrinsic::experimental_vector_match:
785+
// For now, we want explicit support from the target for histograms and
786+
// matches.
785787
return InstructionCost::getInvalid();
786788
case Intrinsic::allow_runtime_check:
787789
case Intrinsic::allow_ubsan_check:

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,23 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
905905
}
906906
break;
907907
}
908+
case Intrinsic::experimental_vector_match: {
909+
EVT SearchVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
910+
unsigned SearchSize =
911+
cast<FixedVectorType>(ICA.getArgTypes()[1])->getNumElements();
912+
// If we can't lower to MATCH, return an invalid cost.
913+
if (getTLI()->shouldExpandVectorMatch(SearchVT, SearchSize))
914+
return InstructionCost::getInvalid();
915+
// Base cost for MATCH instructions. At least on the Neoverse V2 and
916+
// Neoverse V3 these are cheap operations with the same latency as a vector
917+
// ADD. In most cases, however, we also need to do an extra DUP.
918+
InstructionCost Cost = 4;
919+
// For fixed-length vectors we currently need an extra five--six
920+
// instructions besides the MATCH.
921+
if (isa<FixedVectorType>(RetTy))
922+
Cost += 10;
923+
return Cost;
924+
}
908925
default:
909926
break;
910927
}

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,6 +1360,54 @@ define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m
13601360
ret void
13611361
}
13621362

1363+
define void @match() #3 {
1364+
; CHECK-VSCALE-1-LABEL: 'match'
1365+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
1366+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1367+
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1368+
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1369+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
1370+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1371+
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1372+
; CHECK-VSCALE-1-NEXT: Cost Model: Invalid cost for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1373+
; CHECK-VSCALE-1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1374+
;
1375+
; CHECK-VSCALE-2-LABEL: 'match'
1376+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
1377+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1378+
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1379+
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1380+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
1381+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1382+
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1383+
; CHECK-VSCALE-2-NEXT: Cost Model: Invalid cost for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1384+
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1385+
;
1386+
; TYPE_BASED_ONLY-LABEL: 'match'
1387+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
1388+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1389+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1390+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1391+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
1392+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1393+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1394+
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1395+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
1396+
;
1397+
1398+
%match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef)
1399+
%match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef)
1400+
%match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef)
1401+
%match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef)
1402+
1403+
%match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef)
1404+
%match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef)
1405+
%match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef)
1406+
%match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef)
1407+
1408+
ret void
1409+
}
1410+
13631411
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64, i64)
13641412
declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64, i64)
13651413
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64, i64)

0 commit comments

Comments
 (0)