-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[SimplifyIndVar] ICMP predicate conversion to EQ/NE #144945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
836f2d2
4a3233f
ad728d6
09e7244
96eda2e
6dfe0c0
9a3dae9
b2b5838
9b9a224
a1ecebb
9be6ea9
9812774
0971035
faa06b4
4e35257
d4d7ddd
616c380
ef63b9a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,6 +97,7 @@ namespace { | |
| bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand); | ||
| bool makeIVComparisonInvariant(ICmpInst *ICmp, Instruction *IVOperand); | ||
| void eliminateIVComparison(ICmpInst *ICmp, Instruction *IVOperand); | ||
| bool forceEqualityForICmp(ICmpInst *ICmp, Instruction *IVOperand); | ||
| void simplifyIVRemainder(BinaryOperator *Rem, Instruction *IVOperand, | ||
| bool IsSigned); | ||
| void replaceRemWithNumerator(BinaryOperator *Rem); | ||
|
|
@@ -244,6 +245,128 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp, | |
| return true; | ||
| } | ||
|
|
||
| /// Try to change predicate of ICmp to EQ/NE to facilitate better work of OSR. | ||
| /// This can be done only if all possible IV values but one lead to the same | ||
| /// produced comparison result, while the 'chosen one' value gives the opposite | ||
| /// result. | ||
| bool SimplifyIndvar::forceEqualityForICmp(ICmpInst *ICmp, | ||
| Instruction *IVOperand) { | ||
| if (ICmp->isEquality()) { | ||
| // nothing to do | ||
| return false; | ||
| } | ||
|
|
||
| unsigned BoundOperandIdx = IVOperand == ICmp->getOperand(0) ? 1 : 0; | ||
| const SCEV *BoundSCEV = SE->getSCEV(ICmp->getOperand(BoundOperandIdx)); | ||
| const SCEVConstant *BoundC = dyn_cast<SCEVConstant>(BoundSCEV); | ||
| CmpInst::Predicate OrigPredicate = ICmp->getPredicate(); | ||
| CmpInst::Predicate NewPredicate = CmpInst::BAD_ICMP_PREDICATE; | ||
| Type *Ty = IVOperand->getType(); | ||
| APInt NewBoundA; | ||
|
|
||
| if (BoundC) { | ||
| // Try to find the 'chosen one' value basing on predicate type and bound | ||
| const APInt &BoundA = BoundC->getAPInt(); | ||
| ConstantRange ExactCR = | ||
| ConstantRange::makeExactICmpRegion(OrigPredicate, BoundA); | ||
| if (!ExactCR.getEquivalentICmp(NewPredicate, NewBoundA)) { | ||
| NewPredicate = CmpInst::BAD_ICMP_PREDICATE; | ||
| } | ||
| } | ||
|
|
||
| if (!ICmpInst::isEquality(NewPredicate)) { | ||
| const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVOperand)); | ||
| if (!AR) { | ||
| return false; | ||
| } | ||
| const SCEVConstant *IVStart = dyn_cast<SCEVConstant>(AR->getStart()); | ||
| const SCEVConstant *IVStep = | ||
| dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)); | ||
| if (!IVStart || !IVStep || !IVStep->getValue()->getValue()) { | ||
| return false; | ||
| } | ||
|
|
||
| if (BoundC) { | ||
| // Check to see the 'chosen one' value is the IV start value | ||
| bool HasNoWrap = ICmpInst::isSigned(OrigPredicate) | ||
| ? AR->hasNoSignedWrap() | ||
| : AR->hasNoUnsignedWrap(); | ||
| if (HasNoWrap) { | ||
| const DataLayout &DL = ICmp->getParent()->getDataLayout(); | ||
| Constant *SecondIterIV = | ||
| ConstantInt::get(Ty, IVStart->getAPInt() + IVStep->getAPInt()); | ||
| Constant *FirstIterResult = ConstantFoldCompareInstOperands( | ||
| OrigPredicate, IVStart->getValue(), BoundC->getValue(), DL); | ||
| Constant *SecondIterResult = ConstantFoldCompareInstOperands( | ||
| OrigPredicate, SecondIterIV, BoundC->getValue(), DL); | ||
| if (FirstIterResult != SecondIterResult) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an interesting little bit of logic. If I'm following correctly, you're claiming that for an AR which is nuw/nsw, if the non-equality condition changes between iteration 0/1 that the value of that condition on all iterations 2+ must be either a) the value at iteration 1, or poison? I believe that holds. Though, shouldn't this be a subset of computeExitCountExhaustively? (Ignore the bit about not controlling an exit for the moment.) Do you actually see this case come up much? I don't think the "first iteration is special" case is one we've optimized much for beyond basic peeling.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
yes
In context of forceEqualityForICmp() this can be ICmp not controlling loop exit.
I don't know the statistics, but for example the change in llvm/test/Transforms/IndVarSimplify/AArch64/loop-guards.ll your asked about above is due to this rule (and there a loop-local branch reads the condition). Please note that the motivation behind this change is not to optimize the first iteration but to optimize the loop body itself (as the comparison will be executed on all loop iterations). |
||
| NewBoundA = IVStart->getAPInt(); | ||
| NewPredicate = FirstIterResult->isAllOnesValue() ? CmpInst::ICMP_EQ | ||
| : CmpInst::ICMP_NE; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| if (!ICmpInst::isEquality(NewPredicate)) { | ||
| // Check to see the 'chosen one' value is the very last IV value. | ||
| // To put it differently, check to see if ICmp directly or indirectly | ||
| // defines maximum loop trip count (or simply has aligned behavior by | ||
| // accident). This is different from loop exit condition rewriting as here | ||
| // not only ICmp instructions directly writing to exiting branch are | ||
| // considered. | ||
|
|
||
| // check to see if max trip count and IV parameters are constant | ||
| const SCEVConstant *MaxBackCount = | ||
| dyn_cast<SCEVConstant>(SE->getConstantMaxBackedgeTakenCount(L)); | ||
| if (!MaxBackCount) { | ||
| return false; | ||
| } | ||
|
|
||
| // compute the number of consecutive iterations in which produced | ||
| // predicate value will be the same | ||
| bool ExitIfTrue = false; | ||
| auto EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the docs: "Compute the number of times the backedge of the specified loop will execute if its exit condition were a conditional branch of ExitCond.". Nothing in this code ensures that the condition is branched on by a loop exiting branch. As such, the use here is invalid. The major case which comes up here is when the condition produces poison on some iteration, but is only possible branched on before that iteration. (i.e. the condition is used down some conditional path)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please note that the statement from the docs doesn't describe a pre-condition or a requirement. It actually describes how the interface works: it calculates the number of iterations, assuming a hypothetical situation where a specified condition controls the exit from the loop. Actually the interface works w/ the methmatical abstraction which looks for the first iteration at which the condition evaluation gives a different result. This is exactly what is needed for this particular use case, so to my mind it's correct to utilize this interface here.
If the condition produces poison then it can do so only after the iteration at which hypothetical branch controlled by it exits the loop (as in opposite case SE->computeExitLimitFromCond doesn't return correct value). But this is totally ok as the value returned by this interface is only compared w/ the loop trip count and cases when they aren't equal are dropped (so we shouldn't care about iterations after loop exit). |
||
| const SCEVConstant *SameIterCount = | ||
| dyn_cast<SCEVConstant>(EL.ExactNotTaken); | ||
| if (!SameIterCount || SameIterCount->getValue()->isZero()) { | ||
| ExitIfTrue = !ExitIfTrue; | ||
| EL = SE->computeExitLimitFromCond(L, ICmp, ExitIfTrue, false); | ||
| SameIterCount = dyn_cast<SCEVConstant>(EL.ExactNotTaken); | ||
| } | ||
|
|
||
| if (SameIterCount != MaxBackCount) { | ||
| // ICmp isn't aligned with maximum trip count | ||
| return false; | ||
| } | ||
|
|
||
| unsigned IVBitWidth = IVStep->getAPInt().getBitWidth(); | ||
| unsigned CountBitWidth = SameIterCount->getAPInt().getBitWidth(); | ||
| APInt SameIterCountA = SameIterCount->getAPInt(); | ||
| if (IVBitWidth < CountBitWidth) { | ||
| SameIterCountA = SameIterCountA.trunc(IVBitWidth); | ||
| } else if (IVBitWidth > CountBitWidth) { | ||
| SameIterCountA = SameIterCountA.zext(IVBitWidth); | ||
| } | ||
| NewBoundA = IVStart->getAPInt() + (IVStep->getAPInt() * SameIterCountA); | ||
| NewPredicate = ExitIfTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; | ||
| } | ||
| } | ||
|
|
||
| if (!TTI->isLegalICmpImmediate(NewBoundA.getSExtValue())) { | ||
| return false; | ||
| } | ||
|
|
||
| LLVM_DEBUG(dbgs() << "INDVARS: Force EQ/NE predicate for max trip count: " | ||
| << *ICmp << '\n'); | ||
|
|
||
| assert(Ty->getPrimitiveSizeInBits() == NewBoundA.getBitWidth() && | ||
| "bit widths should be aligned"); | ||
| ICmp->setOperand(BoundOperandIdx, ConstantInt::get(Ty, NewBoundA)); | ||
| ICmp->setPredicate(NewPredicate); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| /// SimplifyIVUsers helper for eliminating useless | ||
| /// comparisons against an induction variable. | ||
| void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, | ||
|
|
@@ -267,33 +390,43 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, | |
| // If the condition is always true or always false in the given context, | ||
| // replace it with a constant value. | ||
| SmallVector<Instruction *, 4> Users; | ||
| bool IsDead = false; | ||
| for (auto *U : ICmp->users()) | ||
| Users.push_back(cast<Instruction>(U)); | ||
| const Instruction *CtxI = findCommonDominator(Users, *DT); | ||
| if (auto Ev = SE->evaluatePredicateAt(Pred, S, X, CtxI)) { | ||
| SE->forgetValue(ICmp); | ||
| ICmp->replaceAllUsesWith(ConstantInt::getBool(ICmp->getContext(), *Ev)); | ||
| DeadInsts.emplace_back(ICmp); | ||
| IsDead = true; | ||
| LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); | ||
| } else if (makeIVComparisonInvariant(ICmp, IVOperand)) { | ||
| // fallthrough to end of function | ||
| } else if (ICmpInst::isSigned(OriginalPred) && | ||
| SE->isKnownNonNegative(S) && SE->isKnownNonNegative(X)) { | ||
| IsDead = true; | ||
| } else { | ||
| // If we were unable to make anything above, all we can is to canonicalize | ||
| // the comparison hoping that it will open the doors for other | ||
| // optimizations. If we find out that we compare two non-negative values, | ||
| // we turn the instruction's predicate to its unsigned version. Note that | ||
| // we cannot rely on Pred here unless we check if we have swapped it. | ||
| assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); | ||
| LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp | ||
| << '\n'); | ||
| ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); | ||
| ICmp->setSameSign(); | ||
| } else | ||
| return; | ||
| // optimizations. | ||
| if (ICmpInst::isSigned(OriginalPred) && SE->isKnownNonNegative(S) && | ||
| SE->isKnownNonNegative(X)) { | ||
| // If we find out that we compare two non-negative values, | ||
| // we turn the instruction's predicate to its unsigned version. Note that | ||
| // we cannot rely on Pred here unless we check if we have swapped it. | ||
| assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); | ||
| LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp | ||
| << '\n'); | ||
| ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred)); | ||
| ICmp->setSameSign(); | ||
| Changed = true; | ||
| } | ||
| if (forceEqualityForICmp(ICmp, IVOperand)) { | ||
| Changed = true; | ||
| } | ||
| } | ||
|
|
||
| ++NumElimCmp; | ||
| Changed = true; | ||
| if (IsDead) { | ||
|
||
| NumElimCmp++; | ||
| Changed = true; | ||
| } | ||
| } | ||
|
|
||
| bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -13,7 +13,7 @@ define i32 @guards_applied_to_add_rec(ptr %dst) { | |
| ; CHECK-NEXT: [[OUTER_IV_0:%.*]] = phi i32 [ 2, %[[ENTRY]] ], [ [[OUTER_IV_0_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] | ||
| ; CHECK-NEXT: [[OUTER_IV_1:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[OUTER_IV_0]], %[[OUTER_LATCH]] ] | ||
| ; CHECK-NEXT: [[SHR28:%.*]] = lshr i32 [[OUTER_IV_1]], 1 | ||
| ; CHECK-NEXT: [[PRE:%.*]] = icmp samesign ult i32 [[OUTER_IV_1]], 2 | ||
| ; CHECK-NEXT: [[PRE:%.*]] = icmp samesign eq i32 [[OUTER_IV_1]], 1 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change is highly suspicious. 1 was previously not a value which evaluated to true, and now is?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here the AR is {1; +; 1}. Before: AR < 2 This looks like a correct transformation |
||
| ; CHECK-NEXT: br i1 [[PRE]], label %[[OUTER_LATCH]], label %[[INNER_PREHEADER:.*]] | ||
| ; CHECK: [[INNER_PREHEADER]]: | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[SHR28]] to i64 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -97,7 +97,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) { | |
| ; CHECK-LABEL: @test2( | ||
| ; CHECK-NEXT: entry: | ||
| ; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[LIMIT:%.*]] to i32 | ||
| ; CHECK-NEXT: br i1 %arg, label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] | ||
| ; CHECK-NEXT: br i1 [[ARG:%.*]], label [[FOR_COND1_PREHEADER_PREHEADER:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]] | ||
| ; CHECK: for.cond1.preheader.us.preheader: | ||
| ; CHECK-NEXT: [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[CONV]], i32 1) | ||
| ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] | ||
|
|
@@ -110,7 +110,7 @@ define void @test2(ptr %a, ptr %b, i8 %limit, i1 %arg) { | |
| ; CHECK-NEXT: br label [[FOR_INC13_US]] | ||
| ; CHECK: for.inc13.us: | ||
| ; CHECK-NEXT: [[INDVARS_IV_NEXT4]] = add nuw nsw i64 [[INDVARS_IV3]], 1 | ||
| ; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT4]], 4 | ||
| ; CHECK-NEXT: [[EXITCOND6:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT4]], 4 | ||
| ; CHECK-NEXT: br i1 [[EXITCOND6]], label [[FOR_COND1_PREHEADER_US]], label [[FOR_END_LOOPEXIT1:%.*]] | ||
| ; CHECK: for.body4.us: | ||
| ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY4_LR_PH_US]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US:%.*]] ] | ||
|
|
@@ -237,8 +237,7 @@ define i32 @test4(i32 %a) { | |
| ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[OR]] to i8 | ||
| ; CHECK-NEXT: [[CALL:%.*]] = call i32 @fn1(i8 signext [[CONV3]]) | ||
| ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i32 [[INDVARS_IV]], -1 | ||
| ; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw i32 [[INDVARS_IV_NEXT]] to i8 | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i8 [[TMP0]], -14 | ||
| ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[INDVARS_IV_NEXT]], 242 | ||
| ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]] | ||
| ; CHECK: for.end: | ||
| ; CHECK-NEXT: ret i32 0 | ||
|
|
@@ -517,8 +516,8 @@ define i32 @test10(i32 %v) { | |
| ; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVARS_IV]], -1 | ||
| ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], [[SEXT]] | ||
| ; CHECK-NEXT: call void @consume.i1(i1 [[TMP1]]) | ||
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp samesign ne i64 [[INDVARS_IV_NEXT]], 11 | ||
|
||
| ; CHECK-NEXT: call void @consume.i64(i64 [[TMP0]]) | ||
| ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 11 | ||
| ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LEAVE:%.*]] | ||
| ; CHECK: leave: | ||
| ; CHECK-NEXT: ret i32 22 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,72 +1,44 @@ | ||
| # CSIR level Instrumentation Flag | ||
| :csir | ||
| cond.llvm.11253644763537639171 | ||
| # Func Hash: | ||
| 1152921517491748863 | ||
| # Num Counters: | ||
| 1 | ||
| # Counter Values: | ||
| 200000 | ||
|
|
||
| foo | ||
| # Func Hash: | ||
| 1720106746050921044 | ||
| # Num Counters: | ||
| 2 | ||
| # Counter Values: | ||
| 100000 | ||
| 1 | ||
|
|
||
| bar | ||
| # Func Hash: | ||
| 1299757151682747028 | ||
| # Num Counters: | ||
| 2 | ||
| # Counter Values: | ||
| 0 | ||
| 0 | ||
|
|
||
| bar | ||
| # Func Hash: | ||
| 29667547796 | ||
| # Num Counters: | ||
| 2 | ||
| # Counter Values: | ||
| 100000 | ||
| 100000 | ||
|
|
||
| main | ||
| # Func Hash: | ||
| 1152921517491748863 | ||
| 1895182923573755903 | ||
| # Num Counters: | ||
| 1 | ||
| # Counter Values: | ||
| 1 | ||
|
|
||
| main | ||
| cspgo_bar.c;clobber | ||
| # Func Hash: | ||
| 1895182923573755903 | ||
| # Num Counters: | ||
| 1 | ||
| # Counter Values: | ||
| 1 | ||
| 200000 | ||
|
|
||
| cspgo.c:foo | ||
| cspgo_bar.c;cond | ||
| # Func Hash: | ||
| 1720106746050921044 | ||
| 1895182923573755903 | ||
| # Num Counters: | ||
| 4 | ||
| # Counter Values: | ||
| 100000 | ||
| 100000 | ||
| 0 | ||
| 1 | ||
| # Counter Values: | ||
| 200000 | ||
|
|
||
| cspgo_bar.c:cond | ||
| cspgo.c;foo | ||
| # Func Hash: | ||
| 12884901887 | ||
| 2216626667076672412 | ||
| # Num Counters: | ||
| 1 | ||
| 2 | ||
| # Counter Values: | ||
| 200000 | ||
| 100000 | ||
| 1 | ||
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AddRec could be in a different loop than ICmp. Either a parent, or a sibling.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, fixed