Skip to content

Commit fc515fb

Browse files
committed
[CostModel] Add type-based cost model for get.active.lane.mask intrinsic
I recently realised that we return an invalid cost when requesting the type-based cost for the get.active.lane.mask intrinsic. I've fixed that in this patch by reusing the existing code for the non-type-based model.
1 parent f5d2996 commit fc515fb

File tree

2 files changed

+40
-31
lines changed

2 files changed

+40
-31
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 28 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1691,6 +1691,29 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16911691
return Cost;
16921692
}
16931693

1694+
InstructionCost getActiveLaneMaskCost(Type *RetTy, Type *ArgTy,
1695+
TTI::TargetCostKind CostKind) {
1696+
EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1697+
EVT ArgVT = getTLI()->getValueType(DL, ArgTy, true);
1698+
1699+
// If we're not expanding the intrinsic then we assume this is cheap
1700+
// to implement.
1701+
if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgVT))
1702+
return getTypeLegalizationCost(RetTy).first;
1703+
1704+
// Create the expanded types that will be used to calculate the uadd_sat
1705+
// operation.
1706+
Type *ExpRetTy =
1707+
VectorType::get(ArgTy, cast<VectorType>(RetTy)->getElementCount());
1708+
IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {},
1709+
FastMathFlags());
1710+
InstructionCost Cost =
1711+
thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1712+
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1713+
CmpInst::ICMP_ULT, CostKind);
1714+
return Cost;
1715+
}
1716+
16941717
/// Get intrinsic cost based on arguments.
16951718
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
16961719
TTI::TargetCostKind CostKind) {
@@ -1987,25 +2010,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
19872010
return Cost;
19882011
}
19892012
case Intrinsic::get_active_lane_mask: {
1990-
EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1991-
EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1992-
1993-
// If we're not expanding the intrinsic then we assume this is cheap
1994-
// to implement.
1995-
if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1996-
return getTypeLegalizationCost(RetTy).first;
1997-
}
1998-
1999-
// Create the expanded types that will be used to calculate the uadd_sat
2000-
// operation.
2001-
Type *ExpRetTy = VectorType::get(
2002-
ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
2003-
IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
2004-
InstructionCost Cost =
2005-
thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
2006-
Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
2007-
CmpInst::ICMP_ULT, CostKind);
2008-
return Cost;
2013+
return thisT()->getActiveLaneMaskCost(RetTy, ICA.getArgTypes()[0],
2014+
CostKind);
20092015
}
20102016
case Intrinsic::experimental_cttz_elts: {
20112017
EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
@@ -2394,6 +2400,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
23942400
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
23952401
return Cost;
23962402
}
2403+
case Intrinsic::get_active_lane_mask:
2404+
return thisT()->getActiveLaneMaskCost(RetTy, ICA.getArgTypes()[0],
2405+
CostKind);
23972406
case Intrinsic::abs:
23982407
ISD = ISD::ABS;
23992408
break;

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -958,16 +958,16 @@ define void @get_lane_mask() #0 {
958958
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
959959
;
960960
; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
961-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
962-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef)
963-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef)
964-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef)
965-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef)
966-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef)
967-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef)
968-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef)
969-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
970-
; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
961+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
962+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef)
963+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef)
964+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef)
965+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef)
966+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef)
967+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef)
968+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef)
969+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
970+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
971971
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
972972
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
973973
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
@@ -976,8 +976,8 @@ define void @get_lane_mask() #0 {
976976
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
977977
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
978978
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
979-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
980-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
979+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
980+
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
981981
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
982982
;
983983
%mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)

0 commit comments

Comments
 (0)