diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 563953516a354..bde19de0c87f5 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -1986,27 +1986,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { } return Cost; } - case Intrinsic::get_active_lane_mask: { - EVT ResVT = getTLI()->getValueType(DL, RetTy, true); - EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true); - - // If we're not expanding the intrinsic then we assume this is cheap - // to implement. - if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) { - return getTypeLegalizationCost(RetTy).first; - } - - // Create the expanded types that will be used to calculate the uadd_sat - // operation. - Type *ExpRetTy = VectorType::get( - ICA.getArgTypes()[0], cast(RetTy)->getElementCount()); - IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF); - InstructionCost Cost = - thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind); - Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy, - CmpInst::ICMP_ULT, CostKind); - return Cost; - } case Intrinsic::experimental_cttz_elts: { EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true); @@ -2054,6 +2033,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { return Cost; } + case Intrinsic::get_active_lane_mask: case Intrinsic::experimental_vector_match: return thisT()->getTypeBasedIntrinsicInstrCost(ICA, CostKind); case Intrinsic::modf: @@ -2394,6 +2374,27 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { thisT()->getArithmeticInstrCost(BinaryOperator::And, RetTy, CostKind); return Cost; } + case Intrinsic::get_active_lane_mask: { + Type *ArgTy = ICA.getArgTypes()[0]; + EVT ResVT = getTLI()->getValueType(DL, RetTy, true); + EVT ArgVT = getTLI()->getValueType(DL, ArgTy, true); + + // If we're not expanding the intrinsic then we assume this is cheap + // to implement. + if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgVT)) + return getTypeLegalizationCost(RetTy).first; + + // Create the expanded types that will be used to calculate the uadd_sat + // operation. + Type *ExpRetTy = + VectorType::get(ArgTy, cast(RetTy)->getElementCount()); + IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF); + InstructionCost Cost = + thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind); + Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy, + CmpInst::ICMP_ULT, CostKind); + return Cost; + } case Intrinsic::abs: ISD = ISD::ABS; break; diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 0bf776b5c97e3..265ff89b36050 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -958,16 +958,16 @@ define void @get_lane_mask() #0 { ; CHECK-VSCALE-2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask' -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i64 = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i64 = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i64 = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv16i1_i32 = call @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv8i1_i32 = call @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv4i1_i32 = call @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask_nxv2i1_i32 = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_nxv32i1_i64 = call @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_nxv16i1_i16 = call @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) @@ -976,8 +976,8 @@ define void @get_lane_mask() #0 { ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %mask_nxv16i1_i64 = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef)