-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[CostModel] Add type-based cost model for get.active.lane.mask intrinsic #130132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
I recently realised that we return an invalid cost when requesting the type-based cost for the get.active.lane.mask intrinsic. I've fixed that in this patch by reusing the existing code for the non-type-based model.
|
@llvm/pr-subscribers-llvm-analysis Author: David Sherwood (david-arm) ChangesI recently realised that we return an invalid cost when requesting Full diff: https://github.com/llvm/llvm-project/pull/130132.diff 2 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 563953516a354..a663c350a8655 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -1691,6 +1691,29 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}
+ InstructionCost getActiveLaneMaskCost(Type *RetTy, Type *ArgTy,
+ TTI::TargetCostKind CostKind) {
+ EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
+ EVT ArgVT = getTLI()->getValueType(DL, ArgTy, true);
+
+ // If we're not expanding the intrinsic then we assume this is cheap
+ // to implement.
+ if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgVT))
+ return getTypeLegalizationCost(RetTy).first;
+
+ // Create the expanded types that will be used to calculate the uadd_sat
+ // operation.
+ Type *ExpRetTy =
+ VectorType::get(ArgTy, cast<VectorType>(RetTy)->getElementCount());
+ IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {},
+ FastMathFlags());
+ InstructionCost Cost =
+ thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
+ Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
+ CmpInst::ICMP_ULT, CostKind);
+ return Cost;
+ }
+
/// Get intrinsic cost based on arguments.
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) {
@@ -1987,25 +2010,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return Cost;
}
case Intrinsic::get_active_lane_mask: {
- EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
- EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
-
- // If we're not expanding the intrinsic then we assume this is cheap
- // to implement.
- if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
- return getTypeLegalizationCost(RetTy).first;
- }
-
- // Create the expanded types that will be used to calculate the uadd_sat
- // operation.
- Type *ExpRetTy = VectorType::get(
- ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
- IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
- InstructionCost Cost =
- thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
- Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
- CmpInst::ICMP_ULT, CostKind);
- return Cost;
+ return thisT()->getActiveLaneMaskCost(RetTy, ICA.getArgTypes()[0],
+ CostKind);
}
case Intrinsic::experimental_cttz_elts: {
EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
@@ -2394,6 +2400,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind);
return Cost;
}
+ case Intrinsic::get_active_lane_mask:
+ return thisT()->getActiveLaneMaskCost(RetTy, ICA.getArgTypes()[0],
+ CostKind);
case Intrinsic::abs:
ISD = ISD::ABS;
break;
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
index 0bf776b5c97e3..265ff89b36050 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll
@@ -958,16 +958,16 @@ define void @get_lane_mask() #0 {
; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef)
@@ -976,8 +976,8 @@ define void @get_lane_mask() #0 {
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
-; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef)
+; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef)
; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)
|
| return thisT()->getActiveLaneMaskCost(RetTy, ICA.getArgTypes()[0], | ||
| CostKind); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this be
return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind);
so that getActiveLaneMaskCost could be moved inline to getTypeBasedIntrinsicInstrCost? That's what we did for experimental_vector_match.
If we did this, we could probably also group the case blocks of the two intrinsics and save an extra LOC. What do you think? :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good suggestion! DOne.
| Type *ExpRetTy = | ||
| VectorType::get(ArgTy, cast<VectorType>(RetTy)->getElementCount()); | ||
| IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, | ||
| FastMathFlags()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could this reuse FMF declared above?
paulwalker-arm
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree with @rj-jesus's FMF suggestion but otherwise this PR looks good to me.
I recently realised that we return an invalid cost when requesting
the type-based cost for the get.active.lane.mask intrinsic. I've
fixed that in this patch by reusing the existing code for the
non-type-based model.