-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[SCEV] Try to re-use pointer LCSSA phis when expanding SCEVs. #147824
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesGeneralize the code added in A common source of integer AddRecs with pointer bases are runtime checks This improves codegen in some cases when vectorizing and prevents Full diff: https://github.com/llvm/llvm-project/pull/147824.diff 5 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index a101151eed7cc..39fef921a9590 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -530,6 +530,7 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
bool isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, const Loop *L);
+ Value *tryToReuseLCSSAPhi(const SCEVAddRecExpr *S);
Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
const Loop *L, Type *&TruncTy,
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 24fe08d6c3e4e..ca9183b91a19b 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1223,6 +1223,39 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
return Result;
}
+Value *SCEVExpander::tryToReuseLCSSAPhi(const SCEVAddRecExpr *S) {
+ Type *STy = S->getType();
+ const Loop *L = S->getLoop();
+ BasicBlock *EB = L->getExitBlock();
+ if (!EB || !EB->getSinglePredecessor() ||
+ !SE.DT.dominates(EB, Builder.GetInsertBlock()))
+ return nullptr;
+
+ for (auto &PN : EB->phis()) {
+ if (!SE.isSCEVable(PN.getType()))
+ continue;
+ auto *ExitSCEV = SE.getSCEV(&PN);
+ Type *PhiTy = PN.getType();
+ if (STy->isIntegerTy() && PhiTy->isPointerTy())
+ ExitSCEV= SE.getPtrToIntExpr(ExitSCEV, STy);
+ else if (S->getType() != PN.getType())
+ continue;
+ const SCEV *Diff = SE.getMinusSCEV(S, ExitSCEV);
+ if (isa<SCEVCouldNotCompute>(Diff) ||
+ SCEVExprContains(Diff,
+ [](const SCEV *S) { return isa<SCEVAddRecExpr>(S); }))
+ continue;
+
+ Value *DiffV = expand(Diff);
+ Value *BaseV = &PN;
+ if (DiffV->getType()->isIntegerTy() && PhiTy->isPointerTy())
+ BaseV = Builder.CreatePtrToInt(BaseV, DiffV->getType());
+ return Builder.CreateAdd(BaseV, DiffV);
+ }
+
+ return nullptr;
+}
+
Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// In canonical mode we compute the addrec as an expression of a canonical IV
// using evaluateAtIteration and expand the resulting SCEV expression. This
@@ -1262,6 +1295,11 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
return V;
}
+ // If there S is expanded outside the defining loop, check if there is a
+ // matching LCSSA phi node for it.
+ if (Value *V = tryToReuseLCSSAPhi(S))
+ return V;
+
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
if (isa<PointerType>(S->getType())) {
diff --git a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
index 10e10653a431d..3ad262bb20910 100644
--- a/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
+++ b/llvm/test/Transforms/LoopLoadElim/invalidate-laa-after-versioning.ll
@@ -59,19 +59,16 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_1]], i64 1
; CHECK-NEXT: br label [[INNER_2:%.*]]
; CHECK: inner.2:
-; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[INNER_2]] ], [ 0, [[INNER_1_EXIT]] ]
; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[GEP_5]], [[INNER_1_EXIT]] ], [ [[PTR_IV_2_NEXT:%.*]], [[INNER_2]] ]
; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_2]], i64 1
-; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 false, label [[INNER_3_LVER_CHECK:%.*]], label [[INNER_2]]
; CHECK: inner.3.lver.check:
-; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], [[INNER_2]] ]
; CHECK-NEXT: [[LCSSA_PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2]], [[INNER_2]] ]
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 1
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_2]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 24
-; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[LCSSA_PTR_IV_2]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 16
+; CHECK-NEXT: [[SCEVGEP3:%.*]] = inttoptr i64 [[TMP1]] to ptr
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP3]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
@@ -104,7 +101,7 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT4:%.*]], label [[INNER_3]]
; CHECK: outer.latch.loopexit:
; CHECK-NEXT: br label [[OUTER_LATCH]]
-; CHECK: outer.latch.loopexit4:
+; CHECK: outer.latch.loopexit3:
; CHECK-NEXT: br label [[OUTER_LATCH]]
; CHECK: outer.latch:
; CHECK-NEXT: br label [[INNER_1_LVER_CHECK]]
diff --git a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
index 2747895f06a7b..cd2e5dd8055f2 100644
--- a/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll
@@ -18,11 +18,9 @@ define void @reuse_lcssa_phi_for_add_rec1(ptr %head) {
; CHECK-NEXT: [[IV_NEXT]] = add nuw i64 [[IV]], 1
; CHECK-NEXT: br i1 [[EC_1]], label %[[PH:.*]], label %[[LOOP_1]]
; CHECK: [[PH]]:
-; CHECK-NEXT: [[IV_2_LCSSA:%.*]] = phi i32 [ [[IV_2]], %[[LOOP_1]] ]
; CHECK-NEXT: [[IV_LCSSA:%.*]] = phi i64 [ [[IV]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[IV_2_NEXT_LCSSA:%.*]] = phi i32 [ [[IV_2_NEXT]], %[[LOOP_1]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[IV_2_NEXT]], %[[LOOP_1]] ]
; CHECK-NEXT: [[SRC_2:%.*]] = tail call noalias noundef dereferenceable_or_null(8) ptr @calloc(i64 1, i64 8)
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[IV_2_LCSSA]], 1
; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP0]], i32 1)
; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
@@ -106,27 +104,23 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) {
; CHECK-LABEL: define void @runtime_checks_ptr_inductions(
; CHECK-SAME: ptr [[DST_1:%.*]], ptr [[DST_2:%.*]], i1 [[C:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[DST_11:%.*]] = ptrtoint ptr [[DST_1]] to i64
; CHECK-NEXT: br label %[[LOOP_1:.*]]
; CHECK: [[LOOP_1]]:
-; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], %[[LOOP_1]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[DST_1]], %[[ENTRY]] ], [ [[PTR_IV_1_NEXT:%.*]], %[[LOOP_1]] ]
; CHECK-NEXT: [[CALL:%.*]] = call i32 @val()
; CHECK-NEXT: [[SEL_DST:%.*]] = select i1 [[C]], ptr [[DST_1]], ptr [[DST_2]]
; CHECK-NEXT: [[PTR_IV_1_NEXT]] = getelementptr i8, ptr [[PTR_IV_1]], i64 1
; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i32 [[CALL]], 0
-; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 [[EC_1]], label %[[LOOP_2_HEADER_PREHEADER:.*]], label %[[LOOP_1]]
; CHECK: [[LOOP_2_HEADER_PREHEADER]]:
-; CHECK-NEXT: [[SEL_DST_LCSSA2:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], %[[LOOP_1]] ]
+; CHECK-NEXT: [[SEL_DST_LCSSA1:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
; CHECK-NEXT: [[PTR_IV_1_LCSSA:%.*]] = phi ptr [ [[PTR_IV_1]], %[[LOOP_1]] ]
; CHECK-NEXT: [[SEL_DST_LCSSA:%.*]] = phi ptr [ [[SEL_DST]], %[[LOOP_1]] ]
-; CHECK-NEXT: [[SEL_DST_LCSSA23:%.*]] = ptrtoint ptr [[SEL_DST_LCSSA2]] to i64
+; CHECK-NEXT: [[SEL_DST_LCSSA12:%.*]] = ptrtoint ptr [[SEL_DST_LCSSA1]] to i64
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
; CHECK: [[VECTOR_MEMCHECK]]:
-; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDVAR_LCSSA]], [[DST_11]]
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA23]]
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[PTR_IV_1_LCSSA]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SEL_DST_LCSSA12]]
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -148,13 +142,13 @@ define void @runtime_checks_ptr_inductions(ptr %dst.1, ptr %dst.2, i1 %c) {
; CHECK-NEXT: br label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1023, %[[MIDDLE_BLOCK]] ], [ 1, %[[LOOP_2_HEADER_PREHEADER]] ], [ 1, %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[PTR_IV_1_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[PTR_IV_1_LCSSA]], %[[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL5:%.*]] = phi ptr [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[SEL_DST_LCSSA]], %[[LOOP_2_HEADER_PREHEADER]] ], [ [[SEL_DST_LCSSA]], %[[VECTOR_MEMCHECK]] ]
; CHECK-NEXT: br label %[[LOOP_2_HEADER:.*]]
; CHECK: [[LOOP_2_HEADER]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[DEC7:%.*]], %[[LOOP_2_LATCH:.*]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
-; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ]
-; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL6]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[PTR_IV_3_NEXT:%.*]], %[[LOOP_2_LATCH]] ], [ [[BC_RESUME_VAL5]], %[[SCALAR_PH]] ]
; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i32 [[IV]], 1024
; CHECK-NEXT: br i1 [[EC_2]], label %[[EXIT:.*]], label %[[LOOP_2_LATCH]]
; CHECK: [[LOOP_2_LATCH]]:
diff --git a/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll b/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll
index 8075314a65b49..858864276c0a0 100644
--- a/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll
+++ b/llvm/test/Transforms/LoopVersioning/invalidate-laa-after-versioning.ll
@@ -56,19 +56,16 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_1]], i64 1
; CHECK-NEXT: br label [[INNER_2:%.*]]
; CHECK: inner.2:
-; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[INNER_2]] ], [ 0, [[INNER_1_EXIT]] ]
; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[GEP_5]], [[INNER_1_EXIT]] ], [ [[PTR_IV_2_NEXT:%.*]], [[INNER_2]] ]
; CHECK-NEXT: [[PTR_IV_2_NEXT]] = getelementptr inbounds double, ptr [[PTR_IV_2]], i64 1
-; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1
; CHECK-NEXT: br i1 false, label [[INNER_3_LVER_CHECK:%.*]], label [[INNER_2]]
; CHECK: inner.3.lver.check:
-; CHECK-NEXT: [[INDVAR_LCSSA:%.*]] = phi i64 [ [[INDVAR]], [[INNER_2]] ]
; CHECK-NEXT: [[LCSSA_PTR_IV_2:%.*]] = phi ptr [ [[PTR_IV_2]], [[INNER_2]] ]
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds double, ptr [[PTR_PHI]], i64 1
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds double, ptr [[LCSSA_PTR_IV_2]], i64 1
-; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INDVAR_LCSSA]], 3
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 24
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[LCSSA_PTR_IV_2]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 16
+; CHECK-NEXT: [[SCEVGEP:%.*]] = inttoptr i64 [[TMP1]] to ptr
; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[GEP_7]], [[GEP_1]]
; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR_PHI]], [[SCEVGEP]]
; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
@@ -90,10 +87,10 @@ define void @test(ptr %arg, i64 %arg1) {
; CHECK: inner.3:
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ 0, [[INNER_3_PH]] ], [ [[IV_2_NEXT:%.*]], [[INNER_3]] ]
; CHECK-NEXT: [[GEP_8:%.*]] = getelementptr inbounds double, ptr [[GEP_6]], i64 [[IV_2]]
-; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8, !alias.scope !0, !noalias !3
-; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_8]], align 8, !alias.scope !3
+; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_7]], align 8, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
+; CHECK-NEXT: store double 0.000000e+00, ptr [[GEP_8]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: [[GEP_9:%.*]] = getelementptr double, ptr [[PTR_PHI]], i64 [[IV_2]]
-; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8, !alias.scope !3
+; CHECK-NEXT: [[TMP18:%.*]] = load double, ptr [[GEP_9]], align 8, !alias.scope [[META3]]
; CHECK-NEXT: [[IV_2_NEXT]] = add nuw nsw i64 [[IV_2]], 1
; CHECK-NEXT: [[C_2:%.*]] = icmp eq i64 [[IV_2]], 1
; CHECK-NEXT: br i1 [[C_2]], label [[OUTER_LATCH_LOOPEXIT3:%.*]], label [[INNER_3]]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
7386b29
to
88e170a
Compare
ca5fc2b
to
4b53050
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ping, now that #147214 landed.
This simplifies runtime checks in a number of cases already, and prevents
regressions with #142309, which
turns some phis into single-entry ones, which SCEV will look through
now (and expand the whole AddRec), whereas before it would have to treat
the LCSSA phi as SCEVUnknown.
if (isa<SCEVCouldNotCompute>(Diff) || | ||
SCEVExprContains(Diff, | ||
[](const SCEV *S) { return isa<SCEVAddRecExpr>(S); })) | ||
continue; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we limit this to just constant offsets (and use computeConstantDifference)? This check excludes one particularly bad case, but other complex expansions may also be non-profitable.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed that checking just for not containing add-recs might be a bit over-optimistic. Restricting to constant difference on the other hand would mean we miss other profitable cases.
I added a restricted this now to only allow SCEVConstant/SCEVUnknown values, and PtrToInt/negations of those. This should cover all cases I found for now, on a large test set with vectorization enabled .
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[LCSSA_PTR_IV_1]], i64 [[TMP1]] | ||
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[LCSSA_PTR_IV_2]] to i64 | ||
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 16 | ||
; CHECK-NEXT: [[SCEVGEP:%.*]] = inttoptr i64 [[TMP1]] to ptr |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We really shouldn't be emitting ptrtoint+add+inttoptr. Why do we end up with this instead of a GEP? I'd have expected this to not happen as both the phi and the final result are pointers...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep updated to emit a GEP in that case. If the expression itself is used as int again, doing to ptrtoint + add
is a bit more compact, but there's no easy way to detect this at this point.
Add additional test coverage for #147824.
4b53050
to
535fb71
Compare
Value *DiffV = expand(Diff); | ||
Value *BaseV = &PN; | ||
if (DiffV->getType()->isIntegerTy() && PhiTy->isPointerTy()) | ||
return Builder.CreatePtrAdd(BaseV, DiffV); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't fully understand how the int addrec with ptr phi case works. As far as I can tell this is going to produce a ptradd here, but doesn't the result need to be an integer? I see in the tests that there is a ptrtoint, but I don't understand where it is coming from.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was broken by the patch, the cases in the tests should be pointer AddRecs, that are than convered via PtrToInt.
I added a new test case (llvm/test/Transforms/LoopIdiom/reuse-lcssa-phi-scev-expansion.ll
) which has a pointer phi
, with the integer AddRec. Handle by creating a PtrToInt here, if S
is has integer type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Though actually, do you really need that mixed integer/pointer support? It occurred to me that this isn't going to work for non-integral pointers. I'd rather not have it if it's not critical.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfortuantely a common case where this helps is when the phi is a pointer but the SCEV is of integer type. This is generated by LV to compute and check the distance between 2 pointers (example is in https://github.com/llvm/llvm-project/blob/main/llvm/test/Transforms/LoopVectorize/reuse-lcssa-phi-scev-expansion.ll#L122).
For now we will only generate this for integral pointers, so I'm not sure if there's a good way to write a test for it. I could add a speculative continue to skip non-integral pointers?
Add additional test coverage for llvm/llvm-project#147824.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Generalize the code added in llvm#147214 to also support re-using pointer LCSSA phis when expanding integer SCEVs with AddRecs. A common source of integer AddRecs with pointer bases are runtime checks emitted by LV based on the distance between 2 pointer AddRecs. This improves codegen in some cases when vectorizing and prevents regressions with llvm#142309, which turns some phis into single-entry ones, which SCEV will look through now (and expand the whole AddRec), whereas before it would have to treat the LCSSA phi as SCEVUnknown.
6854a9f
to
a91517b
Compare
…Vs. (#147824) Generalize the code added in llvm/llvm-project#147214 to also support re-using pointer LCSSA phis when expanding SCEVs with AddRecs. A common source of integer AddRecs with pointer bases are runtime checks emitted by LV based on the distance between 2 pointer AddRecs. This improves codegen in some cases when vectorizing and prevents regressions with llvm/llvm-project#142309, which turns some phis into single-entry ones, which SCEV will look through now (and expand the whole AddRec), whereas before it would have to treat the LCSSA phi as SCEVUnknown. Compile-time impact neutral: https://llvm-compile-time-tracker.com/compare.php?from=fd5fc76c91538871771be2c3be2ca3a5f2dcac31&to=ca5fc2b3d8e6efc09f1624a17fdbfbe909f14eb4&stat=instructions:u PR: llvm/llvm-project#147824
Add another test case for #147824, where the difference between an existing phi and the target SCEV is an add of a constant.
Update the logic added in llvm#147824 to also allow adds of constants. There are a number of cases where this can help remove redundant phis and replace some computation with a ptrtoint (which likely is free in the backend).
Add another test case for llvm/llvm-project#147824, where the difference between an existing phi and the target SCEV is an add of a constant.
I am seeing an assertion failure when building the Linux kernel for Hexagon after this change.
struct list_head {
struct list_head *next;
} __list_add_prev_0, *cell_sort_array, *sort_cells_cell, sort_cells_tmp,
process_thin_deferred_cells_cells;
int sort_cells_count, process_thin_deferred_cells___trans_tmp_15,
process_thin_deferred_cells_j, process_thin_deferred_cells_count,
process_deferred_bios_tc;
int list_empty();
int sort_cells(struct list_head *cells) {
sort_cells_cell = ({
void *__mptr = cells;
__mptr;
});
for (; !(sort_cells_cell == 0);)
cell_sort_array[sort_cells_count++] = sort_cells_tmp;
return sort_cells_count;
}
void process_thin_deferred_cells() {
do {
process_thin_deferred_cells_count =
sort_cells(&process_thin_deferred_cells_cells);
if (process_thin_deferred_cells___trans_tmp_15) {
process_thin_deferred_cells_j = 0;
for (; process_thin_deferred_cells_j < process_thin_deferred_cells_count;
process_thin_deferred_cells_j++)
*(volatile typeof(__list_add_prev_0) *)0;
return;
}
} while (list_empty());
}
void process_deferred_bios() {
while (process_deferred_bios_tc)
process_thin_deferred_cells();
}
target datalayout = "e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048"
target triple = "hexagon-unknown-linux"
%struct.list_head = type { ptr }
@sort_cells_cell = external global ptr
@sort_cells_count = external global i32
@process_thin_deferred_cells_cells = external global %struct.list_head
@process_thin_deferred_cells___trans_tmp_15 = external global i32
define i32 @sort_cells(ptr %cells) {
entry:
store ptr null, ptr @sort_cells_cell, align 4
br label %for.cond
for.cond: ; preds = %for.body, %entry
%0 = phi ptr [ %.pre, %for.body ], [ %cells, %entry ]
%cmp.not = icmp eq ptr %0, null
br i1 %cmp.not, label %for.end, label %for.body
for.body: ; preds = %for.cond
%1 = load ptr, ptr %cells, align 4
%2 = load i32, ptr @sort_cells_count, align 4, !tbaa !0
%inc = add i32 %2, 1
store i32 %inc, ptr @sort_cells_count, align 4, !tbaa !0
store i32 0, ptr %1, align 4, !tbaa !4
%.pre = load ptr, ptr %cells, align 4
br label %for.cond
for.end: ; preds = %for.cond
%3 = load i32, ptr @sort_cells_count, align 4
ret i32 %3
}
define void @process_thin_deferred_cells() {
entry:
%agg.tmp.ensured.sroa.0 = alloca ptr, align 4
br label %do.body
do.body: ; preds = %do.body, %entry
%call1 = call i32 @sort_cells(ptr @process_thin_deferred_cells_cells)
%0 = load i32, ptr @process_thin_deferred_cells___trans_tmp_15, align 4
%tobool.not = icmp eq i32 %0, 0
br i1 %tobool.not, label %do.body, label %for.cond
for.cond: ; preds = %for.body, %do.body
%1 = phi i32 [ %inc, %for.body ], [ 0, %do.body ]
%cmp = icmp slt i32 %1, %call1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
store volatile ptr null, ptr %agg.tmp.ensured.sroa.0, align 4
%inc = add i32 %1, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
define void @process_deferred_bios() {
entry:
br label %while.cond
while.cond: ; preds = %while.cond, %entry
call void @process_thin_deferred_cells()
br label %while.cond
}
!0 = !{!1, !1, i64 0}
!1 = !{!"int", !2, i64 0}
!2 = !{!"omnipotent char", !3, i64 0}
!3 = !{!"Simple C/C++ TBAA"}
!4 = !{!5, !5, i64 0}
!5 = !{!"p1 _ZTS9list_head", !6, i64 0}
!6 = !{!"any pointer", !2, i64 0}
|
Add additional test coverage for llvm#147824.
…47824) Generalize the code added in llvm#147214 to also support re-using pointer LCSSA phis when expanding SCEVs with AddRecs. A common source of integer AddRecs with pointer bases are runtime checks emitted by LV based on the distance between 2 pointer AddRecs. This improves codegen in some cases when vectorizing and prevents regressions with llvm#142309, which turns some phis into single-entry ones, which SCEV will look through now (and expand the whole AddRec), whereas before it would have to treat the LCSSA phi as SCEVUnknown. Compile-time impact neutral: https://llvm-compile-time-tracker.com/compare.php?from=fd5fc76c91538871771be2c3be2ca3a5f2dcac31&to=ca5fc2b3d8e6efc09f1624a17fdbfbe909f14eb4&stat=instructions:u PR: llvm#147824
Add another test case for llvm#147824, where the difference between an existing phi and the target SCEV is an add of a constant.
If we insert a new add instruction, it may introduce a new use outside the loop that contains the phi node we re-use. Use fixupLCSSAFormFor to fix LCSSA form, if needed. This fixes a crash reported in #147824 (comment).
@nathanchance thanks for the report! Should be fixed in f9f68af by fixing up LCSSA form if needed. |
Add additional test coverage for llvm#147824.
…47824) Generalize the code added in llvm#147214 to also support re-using pointer LCSSA phis when expanding SCEVs with AddRecs. A common source of integer AddRecs with pointer bases are runtime checks emitted by LV based on the distance between 2 pointer AddRecs. This improves codegen in some cases when vectorizing and prevents regressions with llvm#142309, which turns some phis into single-entry ones, which SCEV will look through now (and expand the whole AddRec), whereas before it would have to treat the LCSSA phi as SCEVUnknown. Compile-time impact neutral: https://llvm-compile-time-tracker.com/compare.php?from=fd5fc76c91538871771be2c3be2ca3a5f2dcac31&to=ca5fc2b3d8e6efc09f1624a17fdbfbe909f14eb4&stat=instructions:u PR: llvm#147824
…eeded. If we insert a new add instruction, it may introduce a new use outside the loop that contains the phi node we re-use. Use fixupLCSSAFormFor to fix LCSSA form, if needed. This fixes a crash reported in llvm/llvm-project#147824 (comment).
Update the logic added in llvm#147824 to also allow adds of constants. There are a number of cases where this can help remove redundant phis and replace some computation with a ptrtoint (which likely is free in the backend).
Update the logic added in llvm#147824 to also allow adds of constants. There are a number of cases where this can help remove redundant phis and replace some computation with a ptrtoint (which likely is free in the backend).
…0693) Update the logic added in llvm/llvm-project#147824 to also allow adds of constants. There are a number of cases where this can help remove redundant phis and replace some computation with a ptrtoint (which likely is free in the backend). PR: llvm/llvm-project#150693
Update the logic added in llvm#147824 to also allow adds of constants. There are a number of cases where this can help remove redundant phis and replace some computation with a ptrtoint (which likely is free in the backend). PR: llvm#150693
Generalize the code added in
#147214 to also support
re-using pointer LCSSA phis when expanding integer SCEVs with AddRecs.
A common source of integer AddRecs with pointer bases are runtime checks
emitted by LV based on the distance between 2 pointer AddRecs.
This improves codegen in some cases when vectorizing and prevents
regressions with #142309, which
turns some phis into single-entry ones, which SCEV will look through
now (and expand the whole AddRec), whereas before it would have to treat
the LCSSA phi as SCEVUnknown.
Compile-time impact neutral: https://llvm-compile-time-tracker.com/compare.php?from=fd5fc76c91538871771be2c3be2ca3a5f2dcac31&to=ca5fc2b3d8e6efc09f1624a17fdbfbe909f14eb4&stat=instructions:u