Skip to content

Commit 5571824

Browse files
- Removed tail-folding changes
- Moved cost-model related changes to new PR - Added optsize test to sve-wide-lane-mask.ll
1 parent 442a7d1 commit 5571824

File tree

5 files changed

+134
-182
lines changed

5 files changed

+134
-182
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -209,10 +209,9 @@ struct TailFoldingInfo {
209209
TargetLibraryInfo *TLI;
210210
LoopVectorizationLegality *LVL;
211211
InterleavedAccessInfo *IAI;
212-
bool UseWideLaneMask;
213212
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL,
214-
InterleavedAccessInfo *IAI, bool UseWideLaneMask = false)
215-
: TLI(TLI), LVL(LVL), IAI(IAI), UseWideLaneMask(UseWideLaneMask) {}
213+
InterleavedAccessInfo *IAI)
214+
: TLI(TLI), LVL(LVL), IAI(IAI) {}
216215
};
217216

218217
class TargetTransformInfo;

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -957,18 +957,10 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
957957
return TyL.first + ExtraCost;
958958
}
959959
case Intrinsic::get_active_lane_mask: {
960-
auto RetTy = cast<VectorType>(ICA.getReturnType());
961-
EVT RetVT = getTLI()->getValueType(DL, RetTy);
962-
EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
963-
if (RetTy->isScalableTy()) {
964-
if (getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) ||
965-
(!ST->hasSVE2p1() && !ST->hasSME2()) ||
966-
TLI->getTypeAction(RetTy->getContext(), RetVT) !=
967-
TargetLowering::TypeSplitVector)
968-
break;
969-
auto LT = getTypeLegalizationCost(RetTy);
970-
return LT.first / 2;
971-
} else {
960+
auto *RetTy = dyn_cast<FixedVectorType>(ICA.getReturnType());
961+
if (RetTy) {
962+
EVT RetVT = getTLI()->getValueType(DL, RetTy);
963+
EVT OpVT = getTLI()->getValueType(DL, ICA.getArgTypes()[0]);
972964
if (!getTLI()->shouldExpandGetActiveLaneMask(RetVT, OpVT) &&
973965
!getTLI()->isTypeLegal(RetVT)) {
974966
// We don't have enough context at this point to determine if the mask
@@ -980,7 +972,7 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
980972
// NOTE: getScalarizationOverhead returns a cost that's far too
981973
// pessimistic for the actual generated codegen. In reality there are
982974
// two instructions generated per lane.
983-
return cast<FixedVectorType>(RetTy)->getNumElements() * 2;
975+
return RetTy->getNumElements() * 2;
984976
}
985977
}
986978
break;
@@ -6154,11 +6146,8 @@ bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) const {
61546146
if (Required == TailFoldingOpts::Disabled)
61556147
Required |= TailFoldingOpts::Simple;
61566148

6157-
TailFoldingOpts DefaultOpts = ST->getSVETailFoldingDefaultOpts();
6158-
if (TFI->UseWideLaneMask)
6159-
DefaultOpts |= TailFoldingOpts::Simple;
6160-
6161-
if (!TailFoldingOptionLoc.satisfies(DefaultOpts, Required))
6149+
if (!TailFoldingOptionLoc.satisfies(ST->getSVETailFoldingDefaultOpts(),
6150+
Required))
61626151
return false;
61636152

61646153
// Don't tail-fold for tight loops where we would be better off interleaving

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,11 @@ class LoopVectorizationCostModel {
12901290
return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;
12911291
}
12921292

1293+
bool preferPredicatedEpilogue() const {
1294+
return ScalarEpilogueStatus == CM_ScalarEpilogueNotNeededUsePredicate ||
1295+
ScalarEpilogueStatus == CM_ScalarEpilogueNotAllowedUsePredicate;
1296+
}
1297+
12931298
/// Returns the TailFoldingStyle that is best for the current loop.
12941299
TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
12951300
if (!ChosenTailFoldingStyle)
@@ -4531,7 +4536,12 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
45314536
// 3. We don't interleave if we think that we will spill registers to memory
45324537
// due to the increased register pressure.
45334538

4534-
if (!CM.isScalarEpilogueAllowed() && !CM.useWideActiveLaneMask())
4539+
// Only interleave tail-folded loops if wide lane masks requested, as the
4540+
// overhead of multiple instructions to calculate the predicate is likely
4541+
// not beneficial. If a scalar epilogue is not allowed for any other reason,
4542+
// do not interleave.
4543+
if (!CM.isScalarEpilogueAllowed() &&
4544+
!(CM.preferPredicatedEpilogue() && CM.useWideActiveLaneMask()))
45354545
return 1;
45364546

45374547
if (any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
@@ -9008,7 +9018,7 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
90089018
};
90099019

90109020
// 4) if the TTI hook indicates this is profitable, request predication.
9011-
TailFoldingInfo TFI(TLI, &LVL, IAI, EnableWideActiveLaneMask);
9021+
TailFoldingInfo TFI(TLI, &LVL, IAI);
90129022
if (TTI->preferPredicateOverEpilogue(&TFI))
90139023
return CM_ScalarEpilogueNotNeededUsePredicate;
90149024

llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll

Lines changed: 23 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-1
3-
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-2 --check-prefix=CHECK-SVE
4-
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve2p1 | FileCheck %s --check-prefix=CHECK-VSCALE-2 --check-prefix=CHECK-SVE2p1-OR-SME2
5-
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sme2 | FileCheck %s --check-prefix=CHECK-VSCALE-2 --check-prefix=CHECK-SVE2p1-OR-SME2
3+
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -mcpu=neoverse-v1 -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=CHECK-VSCALE-2
64
; RUN: opt < %s -passes="print<cost-model>" -cost-kind=all 2>&1 -intrinsic-cost-strategy=type-based-intrinsic-cost -disable-output -S -mtriple=aarch64--linux-gnu -mattr=+sve | FileCheck %s --check-prefix=TYPE_BASED_ONLY
75

86
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -922,7 +920,6 @@ define void @get_lane_mask() #0 {
922920
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
923921
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
924922
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
925-
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
926923
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
927924
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
928925
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
@@ -937,53 +934,28 @@ define void @get_lane_mask() #0 {
937934
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
938935
; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
939936
;
940-
; CHECK-SVE-LABEL: 'get_lane_mask'
941-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
942-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
943-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
944-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
945-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
946-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
947-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
948-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
949-
; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
950-
; CHECK-SVE-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
951-
; CHECK-SVE-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
952-
; CHECK-SVE-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
953-
; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
954-
; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
955-
; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
956-
; CHECK-SVE-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
957-
; CHECK-SVE-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
958-
; CHECK-SVE-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
959-
; CHECK-SVE-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
960-
; CHECK-SVE-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
961-
; CHECK-SVE-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
962-
; CHECK-SVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
963-
;
964-
; CHECK-SVE2p1-OR-SME2-LABEL: 'get_lane_mask'
965-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
966-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
967-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
968-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
969-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
970-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
971-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
972-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
973-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
974-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
975-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
976-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
977-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
978-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
979-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
980-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
981-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
982-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
983-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
984-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
985-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
986-
; CHECK-SVE2p1-OR-SME2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
937+
; CHECK-VSCALE-2-LABEL: 'get_lane_mask'
938+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
939+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison)
940+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison)
941+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison)
942+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison)
943+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
944+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
945+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
946+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
947+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
948+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
949+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison)
950+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison)
951+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison)
952+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison)
953+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison)
954+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison)
955+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison)
956+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison)
957+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison)
958+
; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
987959
;
988960
; TYPE_BASED_ONLY-LABEL: 'get_lane_mask'
989961
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison)
@@ -994,7 +966,6 @@ define void @get_lane_mask() #0 {
994966
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison)
995967
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
996968
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
997-
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
998969
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
999970
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
1000971
; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison)
@@ -1019,7 +990,6 @@ define void @get_lane_mask() #0 {
1019990
%mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison)
1020991
%mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison)
1021992

1022-
%mask_nxv64i1_i64 = call <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64 poison, i64 poison)
1023993
%mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison)
1024994
%mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison)
1025995

@@ -1446,7 +1416,6 @@ declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32, i32)
14461416
declare <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32, i32)
14471417
declare <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32, i32)
14481418
declare <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32, i32)
1449-
declare <vscale x 64 x i1> @llvm.get.active.lane.mask.nxv64i1.i64(i64, i64)
14501419
declare <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64, i64)
14511420
declare <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16, i16)
14521421
declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64, i64)

0 commit comments

Comments
 (0)