Skip to content

Commit 320fe44

Browse files
committed
[LV] Don't consider VPValues without underlying value as generating vectors
In some cases during tail folding, the plan won't actually vectorize any values from the original IR, but it may vectorize e.g. VPWidenCanonicalIVRecipe because it's used for generating the active lane mask. This is enough for willGenerateVectors to consider the VPlan as generating vectors, even though everything useful is scalarized. This patch fixes this by checking that recipes have an underlying value set first. Because some vectorized recipes may be hoisted into the preheader via LICM, we need to also start the search in the preheader to prevent regressions. This also means we now scan the original scalar loop, so we need to handle VPIRInstructionSC.
1 parent 6abe4d9 commit 320fe44

File tree

5 files changed

+66
-239
lines changed

5 files changed

+66
-239
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4172,7 +4172,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
41724172
// Set of already visited types.
41734173
DenseSet<Type *> Visited;
41744174
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
4175-
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
4175+
vp_depth_first_deep(Plan.getVectorPreheader()))) {
41764176
for (VPRecipeBase &R : *VPBB) {
41774177
if (EphemeralRecipes.contains(&R))
41784178
continue;
@@ -4192,6 +4192,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
41924192
case VPDef::VPEVLBasedIVPHISC:
41934193
case VPDef::VPPredInstPHISC:
41944194
case VPDef::VPBranchOnMaskSC:
4195+
case VPDef::VPIRInstructionSC:
41954196
continue;
41964197
case VPDef::VPReductionSC:
41974198
case VPDef::VPActiveLaneMaskPHISC:
@@ -4247,6 +4248,9 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
42474248
// operand.
42484249
VPValue *ToCheck =
42494250
R.getNumDefinedValues() >= 1 ? R.getVPValue(0) : R.getOperand(1);
4251+
// Don't consider values that didn't come from the original scalar IR.
4252+
if (!ToCheck->getUnderlyingValue())
4253+
continue;
42504254
Type *ScalarTy = TypeInfo.inferScalarType(ToCheck);
42514255
if (!Visited.insert({ScalarTy}).second)
42524256
continue;

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 55 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -737,12 +737,15 @@ exit:
737737
ret void
738738
}
739739

740-
define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
740+
define void @exit_cond_zext_iv(ptr %dst, i64 %N) #0 {
741741
; DEFAULT-LABEL: define void @exit_cond_zext_iv(
742-
; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
742+
; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
743743
; DEFAULT-NEXT: [[ENTRY:.*]]:
744744
; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
745-
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
745+
; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
746+
; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP7]], 8
747+
; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
748+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], [[TMP8]]
746749
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
747750
; DEFAULT: [[VECTOR_SCEVCHECK]]:
748751
; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
@@ -754,18 +757,23 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
754757
; DEFAULT-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
755758
; DEFAULT-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
756759
; DEFAULT: [[VECTOR_PH]]:
757-
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
760+
; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
761+
; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8
762+
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], [[TMP10]]
758763
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
764+
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
765+
; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 8
759766
; DEFAULT-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
760767
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
761768
; DEFAULT: [[VECTOR_BODY]]:
762769
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
763-
; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
764-
; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2
765-
; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
766-
; DEFAULT-NEXT: store i32 0, ptr [[TMP9]], align 8
767-
; DEFAULT-NEXT: store i32 0, ptr [[TMP10]], align 8
768-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
770+
; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
771+
; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
772+
; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
773+
; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP16]]
774+
; DEFAULT-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[TMP14]], align 4
775+
; DEFAULT-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[TMP17]], align 4
776+
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
769777
; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
770778
; DEFAULT-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
771779
; DEFAULT: [[MIDDLE_BLOCK]]:
@@ -778,8 +786,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
778786
; DEFAULT: [[LOOP]]:
779787
; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
780788
; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
781-
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
782-
; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 8
789+
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_CONV]]
790+
; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 4
783791
; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
784792
; DEFAULT-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
785793
; DEFAULT-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
@@ -788,63 +796,56 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
788796
; DEFAULT-NEXT: ret void
789797
;
790798
; PRED-LABEL: define void @exit_cond_zext_iv(
791-
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
799+
; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
792800
; PRED-NEXT: [[ENTRY:.*]]:
793801
; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
794802
; PRED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
795803
; PRED: [[VECTOR_SCEVCHECK]]:
796804
; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
797805
; PRED-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
798-
; PRED-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
799-
; PRED-NEXT: [[TMP3:%.*]] = add i32 1, [[TMP2]]
800-
; PRED-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP3]], 1
801-
; PRED-NEXT: [[TMP5:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
802-
; PRED-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
803-
; PRED-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
806+
; PRED-NEXT: [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
807+
; PRED-NEXT: [[TMP2:%.*]] = add i32 1, [[TMP1]]
808+
; PRED-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 1
809+
; PRED-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
810+
; PRED-NEXT: [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
811+
; PRED-NEXT: br i1 [[TMP5]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
804812
; PRED: [[VECTOR_PH]]:
805-
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
806-
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
813+
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
814+
; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
815+
; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
816+
; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], [[TMP8]]
817+
; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
807818
; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
808-
; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
809-
; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
810-
; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
819+
; PRED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
820+
; PRED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
821+
; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
822+
; PRED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
823+
; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[UMAX1]], [[TMP12]]
824+
; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[UMAX1]], [[TMP12]]
825+
; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0
826+
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX1]])
811827
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
812828
; PRED: [[VECTOR_BODY]]:
813-
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
814-
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
815-
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
816-
; PRED-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
817-
; PRED-NEXT: [[TMP7:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT3]]
818-
; PRED-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
819-
; PRED-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
820-
; PRED: [[PRED_STORE_IF]]:
821-
; PRED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
822-
; PRED-NEXT: [[TMP10:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP9]], i32 2
823-
; PRED-NEXT: store i32 0, ptr [[TMP10]], align 8
824-
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
825-
; PRED: [[PRED_STORE_CONTINUE]]:
826-
; PRED-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
827-
; PRED-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
828-
; PRED: [[PRED_STORE_IF4]]:
829-
; PRED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
830-
; PRED-NEXT: [[TMP13:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP12]], i32 2
831-
; PRED-NEXT: store i32 0, ptr [[TMP13]], align 8
832-
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]]
833-
; PRED: [[PRED_STORE_CONTINUE5]]:
834-
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
835-
; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
836-
; PRED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
829+
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
830+
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
831+
; PRED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
832+
; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
833+
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
834+
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
835+
; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
836+
; PRED-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
837+
; PRED-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
837838
; PRED: [[MIDDLE_BLOCK]]:
838839
; PRED-NEXT: br label %[[EXIT:.*]]
839840
; PRED: [[SCALAR_PH]]:
840841
; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
841-
; PRED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
842+
; PRED-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
842843
; PRED-NEXT: br label %[[LOOP:.*]]
843844
; PRED: [[LOOP]]:
844845
; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
845-
; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
846-
; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
847-
; PRED-NEXT: store i32 0, ptr [[GEP]], align 8
846+
; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
847+
; PRED-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_CONV]]
848+
; PRED-NEXT: store i32 0, ptr [[GEP]], align 4
848849
; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
849850
; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
850851
; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
@@ -858,8 +859,8 @@ entry:
858859
loop:
859860
%iv.1 = phi i32 [ 0, %entry ], [ %iv.1.next, %loop ]
860861
%iv.conv = phi i64 [ 0, %entry ], [ %iv.ext, %loop ]
861-
%gep = getelementptr {[100 x i32], i32, i32}, ptr %dst, i64 %iv.conv, i32 2
862-
store i32 0, ptr %gep, align 8
862+
%gep = getelementptr i32, ptr %dst, i64 %iv.conv
863+
store i32 0, ptr %gep, align 4
863864
%iv.1.next = add i32 %iv.1, 1
864865
%iv.ext = zext i32 %iv.1.next to i64
865866
%c = icmp ult i64 %iv.ext, %N

llvm/test/Transforms/LoopVectorize/AArch64/no-vector-instructions.ll

Lines changed: 2 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -5,59 +5,14 @@ define void @f(ptr %p, i64 %x, i64 %n) {
55
; CHECK-LABEL: define void @f(
66
; CHECK-SAME: ptr [[P:%.*]], i64 [[X:%.*]], i64 [[N:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
8-
; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[N]], 1
9-
; CHECK-NEXT: [[TMP1:%.*]] = add nuw i64 [[TMP0]], 1
10-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
11-
; CHECK: [[VECTOR_SCEVCHECK]]:
12-
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[N]] to i1
13-
; CHECK-NEXT: [[TMP3:%.*]] = zext i1 [[TMP2]] to i64
14-
; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
15-
; CHECK: [[VECTOR_PH]]:
16-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP1]], 1
17-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
18-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
19-
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP1]], 1
20-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
21-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
22-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
23-
; CHECK: [[VECTOR_BODY]]:
24-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE4:.*]] ]
25-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
26-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
27-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
28-
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1>
29-
; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT]]
30-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
31-
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
32-
; CHECK: [[PRED_STORE_IF]]:
33-
; CHECK-NEXT: [[IV:%.*]] = add i64 [[OFFSET_IDX]], 0
34-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV]]
35-
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
36-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
37-
; CHECK: [[PRED_STORE_CONTINUE]]:
38-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
39-
; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4]]
40-
; CHECK: [[PRED_STORE_IF3]]:
41-
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2
42-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP9]]
43-
; CHECK-NEXT: store i64 [[TMP9]], ptr [[TMP10]], align 8
44-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE4]]
45-
; CHECK: [[PRED_STORE_CONTINUE4]]:
46-
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
47-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
48-
; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
49-
; CHECK: [[MIDDLE_BLOCK]]:
50-
; CHECK-NEXT: br label %[[EXIT:.*]]
51-
; CHECK: [[SCALAR_PH]]:
52-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
538
; CHECK-NEXT: br label %[[LOOP:.*]]
549
; CHECK: [[LOOP]]:
55-
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
10+
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
5611
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i64, ptr [[P]], i64 [[IV1]]
5712
; CHECK-NEXT: store i64 [[IV1]], ptr [[GEP1]], align 8
5813
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2
5914
; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV1]], [[N]]
60-
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
15+
; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT:.*]], label %[[LOOP]]
6116
; CHECK: [[EXIT]]:
6217
; CHECK-NEXT: ret void
6318
;

llvm/test/Transforms/LoopVectorize/SystemZ/force-target-instruction-cost.ll

Lines changed: 2 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -7,47 +7,14 @@ define void @test_scalar_steps_target_instruction_cost(ptr %dst) {
77
; CHECK-LABEL: define void @test_scalar_steps_target_instruction_cost(
88
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
99
; CHECK-NEXT: [[ENTRY:.*]]:
10-
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
11-
; CHECK: [[VECTOR_PH]]:
12-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
13-
; CHECK: [[VECTOR_BODY]]:
14-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ]
15-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3
16-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
17-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
18-
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
19-
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <2 x i64> [[VEC_IV]], splat (i64 8)
20-
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 0
21-
; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
22-
; CHECK: [[PRED_STORE_IF]]:
23-
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0
24-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP2]]
25-
; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP3]], align 8
26-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
27-
; CHECK: [[PRED_STORE_CONTINUE]]:
28-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1
29-
; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]]
30-
; CHECK: [[PRED_STORE_IF1]]:
31-
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 3
32-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[TMP5]]
33-
; CHECK-NEXT: store i64 [[TMP5]], ptr [[TMP6]], align 8
34-
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE2]]
35-
; CHECK: [[PRED_STORE_CONTINUE2]]:
36-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
37-
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10
38-
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39-
; CHECK: [[MIDDLE_BLOCK]]:
40-
; CHECK-NEXT: br label %[[EXIT:.*]]
41-
; CHECK: [[SCALAR_PH]]:
42-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ]
4310
; CHECK-NEXT: br label %[[LOOP:.*]]
4411
; CHECK: [[LOOP]]:
45-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
12+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
4613
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
4714
; CHECK-NEXT: store i64 [[IV]], ptr [[GEP]], align 8
4815
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 3
4916
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[IV]], 22
50-
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
17+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
5118
; CHECK: [[EXIT]]:
5219
; CHECK-NEXT: ret void
5320
;
@@ -65,9 +32,3 @@ loop:
6532
exit:
6633
ret void
6734
}
68-
;.
69-
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
70-
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
71-
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
72-
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
73-
;.

0 commit comments

Comments
 (0)