Skip to content

Commit a55fe62

Browse files
committed
Simplify ScalarIVSteps
1 parent 6dc356d commit a55fe62

File tree

6 files changed

+23
-11
lines changed

6 files changed

+23
-11
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7652,7 +7652,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
76527652
VPlanTransforms::unrollByUF(BestVPlan, BestUF,
76537653
OrigLoop->getHeader()->getContext());
76547654
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
7655-
VPlanTransforms::convertToConcreteRecipes(BestVPlan);
7655+
VPlanTransforms::simplifyRecipes(BestVPlan, Legal->getWidestInductionType());
7656+
VPlanTransforms::removeDeadRecipes(BestVPlan);
7657+
VPlanTransforms::convertToConcreteRecipes(BestVPlan, Legal->getWidestInductionType());
76567658

76577659
// Perform the actual loop transformation.
76587660
VPTransformState State(&TTI, BestVF, BestUF, LI, DT, ILV.Builder, &ILV,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4026,6 +4026,10 @@ class VPlan {
40264026
UFs.insert(UF);
40274027
}
40284028

4029+
unsigned hasSingleUF() const {
4030+
return UFs.size() == 1;
4031+
}
4032+
40294033
/// Return a string with the name of the plan and the applicable VFs and UFs.
40304034
std::string getName() const;
40314035

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -888,6 +888,14 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
888888
return;
889889
}
890890

891+
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&R)) {
892+
if (Steps->getParent()->getPlan()->hasSingleUF() && Steps->getNumOperands() == 2 &&
893+
vputils::onlyFirstLaneUsed(Steps)) {
894+
Steps->replaceAllUsesWith(Steps->getOperand(0));
895+
return;
896+
}
897+
}
898+
891899
VPValue *A;
892900
if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
893901
VPValue *Trunc = R.getVPSingleValue();
@@ -964,7 +972,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
964972

965973
/// Try to simplify the recipes in \p Plan. Use \p CanonicalIVTy as type for all
966974
/// un-typed live-ins in VPTypeAnalysis.
967-
static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy) {
975+
void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy) {
968976
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
969977
Plan.getEntry());
970978
VPTypeAnalysis TypeInfo(CanonicalIVTy);
@@ -1041,7 +1049,6 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
10411049
}
10421050

10431051
Term->eraseFromParent();
1044-
VPlanTransforms::removeDeadRecipes(Plan);
10451052

10461053
Plan.setVF(BestVF);
10471054
Plan.setUF(BestUF);

llvm/lib/Transforms/Vectorize/VPlanTransforms.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ struct VPlanTransforms {
138138
/// Lower abstract recipes to concrete ones, that can be codegen'd.
139139
static void convertToConcreteRecipes(VPlan &Plan);
140140

141+
static void simplifyRecipes(VPlan &Plan, Type *CanonicalIVTy);
142+
141143
/// If there's a single exit block, optimize its phi recipes that use exiting
142144
/// IV values by feeding them precomputed end values instead, possibly taken
143145
/// one step backwards.

llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,12 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) {
1515
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1616
; CHECK: vector.body:
1717
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
18-
; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
19-
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[TMP0]] to i64
18+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
2019
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]]
2120
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0
2221
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP3]], align 4
2322
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]])
24-
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[TMP0]] to i64
23+
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64
2524
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
2625
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0
2726
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP7]], align 4
@@ -51,7 +50,7 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) {
5150
; CHECK: for.inc:
5251
; CHECK-NEXT: [[INC]] = add nsw i32 [[I_02]], 1
5352
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[INC]], [[LEN]]
54-
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE]], !llvm.loop [[LOOP2:![0-9]+]]
53+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE]], !llvm.loop [[LOOP3:![0-9]+]]
5554
; CHECK: for.end:
5655
; CHECK-NEXT: ret void
5756
;

llvm/test/Transforms/LoopVectorize/pointer-induction.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ define void @pointer_induction_used_as_vector(ptr noalias %start.1, ptr noalias
144144
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
145145
; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> <i64 0, i64 1, i64 2, i64 3>
146146
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
147-
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 0
148-
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP1]]
147+
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[OFFSET_IDX]]
149148
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP]], i64 1
150149
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr ptr, ptr [[NEXT_GEP]], i32 0
151150
; CHECK-NEXT: store <4 x ptr> [[TMP2]], ptr [[TMP3]], align 8
@@ -240,8 +239,7 @@ define void @non_constant_vector_expansion(i32 %0, ptr %call) {
240239
; STRIDED-NEXT: [[TMP4:%.*]] = mul <4 x i64> <i64 0, i64 1, i64 2, i64 3>, [[DOTSPLAT]]
241240
; STRIDED-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i64> [[TMP4]]
242241
; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
243-
; STRIDED-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
244-
; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[TMP5]]
242+
; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr ptr, ptr [[CALL:%.*]], i32 [[OFFSET_IDX]]
245243
; STRIDED-NEXT: [[TMP7:%.*]] = getelementptr ptr, ptr [[TMP6]], i32 0
246244
; STRIDED-NEXT: store <4 x ptr> [[VECTOR_GEP]], ptr [[TMP7]], align 4
247245
; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4

0 commit comments

Comments
 (0)