Skip to content

Commit 71be13a

Browse files
committed
[VPlan] Rewrite VPExpandSCEVExprs in replaceSymbolicStrides.
Extend replaceSymbolicStrides to also replace SCEVUnknowns in VPExpandSCEVExprs using the information from StridesMaps. This results in simpler SCEV expansions in some cases.
1 parent 48a6f2f commit 71be13a

File tree

4 files changed

+42
-42
lines changed

4 files changed

+42
-42
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2853,6 +2853,7 @@ void VPlanTransforms::replaceSymbolicStrides(
28532853
return R->getParent()->getParent() ||
28542854
R->getParent() == Plan.getVectorLoopRegion()->getSinglePredecessor();
28552855
};
2856+
ValueToSCEVMapTy RewriteMap;
28562857
for (const SCEV *Stride : StridesMap.values()) {
28572858
using namespace SCEVPatternMatch;
28582859
auto *StrideV = cast<SCEVUnknown>(Stride)->getValue();
@@ -2880,6 +2881,22 @@ void VPlanTransforms::replaceSymbolicStrides(
28802881
VPValue *CI = Plan.getOrAddLiveIn(ConstantInt::get(U->getType(), C));
28812882
StrideVPV->replaceUsesWithIf(CI, CanUseVersionedStride);
28822883
}
2884+
RewriteMap[StrideV] = PSE.getSCEV(StrideV);
2885+
}
2886+
2887+
for (VPRecipeBase &R : *Plan.getEntry()) {
2888+
auto *ExpSCEV = dyn_cast<VPExpandSCEVRecipe>(&R);
2889+
if (!ExpSCEV)
2890+
continue;
2891+
const SCEV *ScevExpr = ExpSCEV->getSCEV();
2892+
auto *NewSCEV =
2893+
SCEVParameterRewriter::rewrite(ScevExpr, *PSE.getSE(), RewriteMap);
2894+
if (NewSCEV != ScevExpr) {
2895+
VPValue *NewExp = vputils::getOrCreateVPValueForSCEVExpr(Plan, NewSCEV);
2896+
ExpSCEV->replaceAllUsesWith(NewExp);
2897+
if (Plan.getTripCount() == ExpSCEV)
2898+
Plan.resetTripCount(NewExp);
2899+
}
28832900
}
28842901
}
28852902

llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,14 +220,18 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog
220220
; CHECK-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[STEP]], i32 1)
221221
; CHECK-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[UMAX]]
222222
; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP6]], [[TMP8]]
223-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 2
223+
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[INDVAR_LCSSA1]], 2
224+
; CHECK-NEXT: [[SMAX1:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP12]], i32 0)
225+
; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP3]], -1
226+
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[SMAX1]], [[TMP14]]
227+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP15]], 2
224228
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
225229
; CHECK: [[VECTOR_SCEVCHECK]]:
226230
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], 1
227231
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
228232
; CHECK: [[VECTOR_PH]]:
229-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 2
230-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]]
233+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP15]], 2
234+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP15]], [[N_MOD_VF]]
231235
; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[IV_1_LCSSA]], [[N_VEC]]
232236
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
233237
; CHECK: [[VECTOR_BODY]]:
@@ -239,7 +243,7 @@ define void @expand_diff_scev_unknown(ptr %dst, i1 %invar.c, i32 %step) mustprog
239243
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
240244
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
241245
; CHECK: [[MIDDLE_BLOCK]]:
242-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]]
246+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP15]], [[N_VEC]]
243247
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
244248
; CHECK: [[SCALAR_PH]]:
245249
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ [[IV_1_LCSSA]], %[[LOOP_2_PREHEADER]] ], [ [[IV_1_LCSSA]], %[[VECTOR_SCEVCHECK]] ]

llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll

Lines changed: 11 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,11 @@ define void @test_versioned_with_sext_use(i32 %offset, ptr %dst) {
2222
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
2323
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2424
; CHECK: vector.ph:
25-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 200, [[OFFSET_EXT]]
26-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], [[TMP0]]
25+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], 200
2726
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
2827
; CHECK: vector.body:
2928
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
30-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], [[OFFSET_EXT]]
31-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[TMP1]]
29+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
3230
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
3331
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8
3432
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -94,13 +92,11 @@ define void @test_versioned_with_zext_use(i32 %offset, ptr %dst) {
9492
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
9593
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
9694
; CHECK: vector.ph:
97-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 200, [[OFFSET_EXT]]
98-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], [[TMP0]]
95+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], 200
9996
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
10097
; CHECK: vector.body:
10198
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
102-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], [[OFFSET_EXT]]
103-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[TMP1]]
99+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
104100
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[DST]], i64 [[TMP3]]
105101
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP4]], align 8
106102
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -233,13 +229,11 @@ define void @test_versioned_with_different_uses(i32 %offset, ptr noalias %dst.1,
233229
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[OFFSET]], 1
234230
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
235231
; CHECK: vector.ph:
236-
; CHECK-NEXT: [[TMP0:%.*]] = mul i64 200, [[OFFSET_EXT]]
237-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], [[TMP0]]
232+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[IV_1]], 200
238233
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
239234
; CHECK: vector.body:
240235
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
241-
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[INDEX]], [[OFFSET_EXT]]
242-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[TMP1]]
236+
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[IV_1]], [[INDEX]]
243237
; CHECK-NEXT: [[OFFSET_IDX2:%.*]] = trunc i64 [[INDEX]] to i32
244238
; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX2]], 0
245239
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX2]], 1
@@ -414,26 +408,20 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
414408
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
415409
; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
416410
; CHECK: vector.ph:
417-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 4
418-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
419-
; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], [[G_64]]
420411
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
421412
; CHECK: vector.body:
422413
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
423-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[G_64]]
424-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[OFFSET_IDX]]
414+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]]
425415
; CHECK-NEXT: store <4 x i16> splat (i16 1), ptr [[TMP4]], align 2
426416
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
427-
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
428-
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
417+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
418+
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
429419
; CHECK: middle.block:
430-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
431-
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
420+
; CHECK-NEXT: br label [[EXIT:%.*]]
432421
; CHECK: scalar.ph:
433-
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
434422
; CHECK-NEXT: br label [[LOOP:%.*]]
435423
; CHECK: loop:
436-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
424+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
437425
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[IV]]
438426
; CHECK-NEXT: store i16 [[G_16]], ptr [[GEP]], align 2
439427
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], [[G_64]]

llvm/test/Transforms/PhaseOrdering/AArch64/indvars-vectorization.ll

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,9 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
1414
; CHECK-NEXT: [[SUB:%.*]] = add i32 [[XA]], -1
1515
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[SUB]] to i64
1616
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[XB]] to i64
17-
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[TMP1]], [[TMP0]]
18-
; CHECK-NEXT: [[SMAX7:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP2]], i64 32000)
19-
; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i64 [[TMP2]], 32000
20-
; CHECK-NEXT: [[UMIN8:%.*]] = zext i1 [[TMP3]] to i64
21-
; CHECK-NEXT: [[TMP4:%.*]] = add nsw i64 [[TMP2]], [[UMIN8]]
22-
; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[SMAX7]], [[TMP4]]
23-
; CHECK-NEXT: [[UMAX9:%.*]] = tail call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1)
24-
; CHECK-NEXT: [[TMP6:%.*]] = udiv i64 [[TMP5]], [[UMAX9]]
25-
; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], [[UMIN8]]
26-
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 1
17+
; CHECK-NEXT: [[TMP2:%.*]] = tail call i64 @llvm.smax.i64(i64 [[TMP0]], i64 31999)
18+
; CHECK-NEXT: [[SMAX10:%.*]] = add nuw nsw i64 [[TMP2]], 1
19+
; CHECK-NEXT: [[TMP8:%.*]] = sub i64 [[SMAX10]], [[TMP0]]
2720
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i64 [[TMP8]], 23
2821
; CHECK-NEXT: [[IDENT_CHECK_NOT:%.*]] = icmp eq i32 [[XB]], 1
2922
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[MIN_ITERS_CHECK]], [[IDENT_CHECK_NOT]]
@@ -50,13 +43,11 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
5043
; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[FOR_BODY_PREHEADER13]], label [[VECTOR_PH:%.*]]
5144
; CHECK: vector.ph:
5245
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP8]], -8
53-
; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[N_VEC]], [[TMP1]]
54-
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[TMP18]], [[TMP0]]
46+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[N_VEC]], [[TMP0]]
5547
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
5648
; CHECK: vector.body:
5749
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
58-
; CHECK-NEXT: [[TMP19:%.*]] = mul nuw i64 [[INDEX]], [[TMP1]]
59-
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[TMP19]], [[TMP0]]
50+
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[INDEX]], [[TMP0]]
6051
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[OFFSET_IDX]]
6152
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP20]], i64 16
6253
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP20]], align 4, !alias.scope [[META0:![0-9]+]]
@@ -75,7 +66,7 @@ define void @s172(i32 noundef %xa, i32 noundef %xb, ptr noundef %a, ptr noundef
7566
; CHECK: middle.block:
7667
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP8]], [[N_VEC]]
7768
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER13]]
78-
; CHECK: for.body.preheader13:
69+
; CHECK: for.body.preheader14:
7970
; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_BODY_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
8071
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
8172
; CHECK: for.body:

0 commit comments

Comments
 (0)