-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LoopPeel] Add new option to peeling loops to convert PHI into IV #121104
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
41fe38a
88272e4
ee26737
cd3fc9c
9b03954
c6f1a3a
0f41b26
b05a507
bfddeff
1b18665
27a46a1
df8f554
cd9b436
9d88973
39e8105
af6ab08
550256d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -77,6 +77,10 @@ static cl::opt<bool> DisableAdvancedPeeling( | |
| cl::desc( | ||
| "Disable advance peeling. Issues for convergent targets (D134803).")); | ||
|
|
||
| static cl::opt<bool> | ||
| EnablePeelingForIV("enable-peeling-for-iv", cl::init(false), cl::Hidden, | ||
| cl::desc("Enable peeling to make a PHI into an IV")); | ||
|
|
||
| static const char *PeeledCountMetaData = "llvm.loop.peeled.count"; | ||
|
|
||
| // Check whether we are capable of peeling this loop. | ||
|
|
@@ -151,45 +155,171 @@ namespace { | |
| // corresponding calls to g are determined and the code for computing | ||
| // x, y, and a can be removed. | ||
| // | ||
| // Similarly, there are cases where peeling makes Phi nodes loop-inductions | ||
| // (i.e., the value is increased or decreased by a fixed amount on every | ||
| // iteration). For example, consider the following function. | ||
| // | ||
| // #define N 100 | ||
| // void f(int a[], int b[]) { | ||
| // int im = N - 1; | ||
| // for (int i = 0; i < N; i++) { | ||
| // a[i] = b[i] + b[im]; | ||
| // im = i; | ||
| // } | ||
| // } | ||
| // | ||
| // The IR of the loop will look something like the following. | ||
| // | ||
| // %i = phi i32 [ 0, %entry ], [ %i.next, %for.body ] | ||
| // %im = phi i32 [ 99, %entry ], [ %i, %for.body ] | ||
| // ... | ||
| // %i.next = add nuw nsw i32 %i, 1 | ||
| // ... | ||
| // | ||
| // In this case, %im becomes a loop-induction variable by peeling 1 iteration, | ||
| // because %i is a loop-induction one. The peeling count can be determined by | ||
| // the same algorithm with loop-invariant case. Such peeling is profitable for | ||
| // loop-vectorization. | ||
| // | ||
| // The PhiAnalyzer class calculates how many times a loop should be | ||
| // peeled based on the above analysis of the phi nodes in the loop while | ||
| // respecting the maximum specified. | ||
| class PhiAnalyzer { | ||
| public: | ||
| PhiAnalyzer(const Loop &L, unsigned MaxIterations); | ||
| PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV); | ||
|
|
||
| // Calculate the sufficient minimum number of iterations of the loop to peel | ||
| // such that phi instructions become determined (subject to allowable limits) | ||
| std::optional<unsigned> calculateIterationsToPeel(); | ||
|
|
||
| protected: | ||
| using PeelCounter = std::optional<unsigned>; | ||
| enum class PeelCounterType { | ||
| Invariant, | ||
| Induction, | ||
| }; | ||
|
|
||
| using PeelCounterValue = std::pair<unsigned, PeelCounterType>; | ||
| using PeelCounter = std::optional<PeelCounterValue>; | ||
| const PeelCounter Unknown = std::nullopt; | ||
|
|
||
| // Add 1 respecting Unknown and return Unknown if result over MaxIterations | ||
| PeelCounter addOne(PeelCounter PC) const { | ||
| if (PC == Unknown) | ||
| return Unknown; | ||
| return (*PC + 1 <= MaxIterations) ? PeelCounter{*PC + 1} : Unknown; | ||
| auto [Val, Ty] = *PC; | ||
| return (Val + 1 <= MaxIterations) ? PeelCounter({Val + 1, Ty}) : Unknown; | ||
| } | ||
|
|
||
| // Return a value representing zero for the given counter type. | ||
| PeelCounter makeZero(PeelCounterType Ty) const { | ||
| return PeelCounter({0, Ty}); | ||
| } | ||
|
|
||
| // Calculate the number of iterations after which the given value | ||
| // becomes an invariant. | ||
| // Calculate the number of iterations after which the given value becomes an | ||
| // invariant or an induction. | ||
| PeelCounter calculate(const Value &); | ||
|
|
||
| // Auxiliary function to calculate the number of iterations for a comparison | ||
| // instruction or a binary operator. | ||
| PeelCounter mergeTwoCounter(const Instruction &CmpOrBinaryOp, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: mergeTwoCounters?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I addressed these comments on PR #155221 . |
||
| const PeelCounterValue &LHS, | ||
| const PeelCounterValue &RHS) const; | ||
|
|
||
| // Returns true if the \p Phi is an induction in the target loop. This is a | ||
| // lightweight check and possible to detect an IV in some cases. | ||
| bool isInductionPHI(const PHINode *Phi) const; | ||
|
|
||
| const Loop &L; | ||
| const unsigned MaxIterations; | ||
| const bool PeelForIV; | ||
|
|
||
| // Map of Values to number of iterations to invariance | ||
| SmallDenseMap<const Value *, PeelCounter> IterationsToInvariance; | ||
| // Map of Values to number of iterations to invariance or induction | ||
| SmallDenseMap<const Value *, PeelCounter> IterationsToInvarianceOrInduction; | ||
| }; | ||
|
|
||
| PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations) | ||
| : L(L), MaxIterations(MaxIterations) { | ||
| PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations, bool PeelForIV) | ||
| : L(L), MaxIterations(MaxIterations), PeelForIV(PeelForIV) { | ||
| assert(canPeel(&L) && "loop is not suitable for peeling"); | ||
| assert(MaxIterations > 0 && "no peeling is allowed?"); | ||
| } | ||
|
|
||
| /// Test if \p Phi is induction variable or not. It can be checked by using | ||
| /// SCEV, but it's expensive to calculate it here. Instead, we perform the | ||
| /// cheaper checks, which cannot detect complex one but enough for some cases. | ||
| bool PhiAnalyzer::isInductionPHI(const PHINode *Phi) const { | ||
| // Currently we only support a loop that has single latch. | ||
| BasicBlock *Latch = L.getLoopLatch(); | ||
| if (Latch == nullptr) | ||
| return false; | ||
|
|
||
| Value *Cur = Phi->getIncomingValueForBlock(Latch); | ||
| SmallPtrSet<Value *, 4> Visited; | ||
| bool VisitBinOp = false; | ||
|
|
||
| // Start at the incoming value of the phi and follow definitions. We consider | ||
| // the phi to be an IV if we can return to it again by traversing only add, | ||
| // sub, or cast instructions. | ||
| while (true) { | ||
| if (Cur == Phi) | ||
| break; | ||
|
|
||
| // Avoid infinite loop. | ||
| if (Visited.contains(Cur)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use |
||
| return false; | ||
|
|
||
| auto *I = dyn_cast<Instruction>(Cur); | ||
| if (!I || !L.contains(I)) | ||
| return false; | ||
|
|
||
| Visited.insert(Cur); | ||
|
|
||
| if (auto *Cast = dyn_cast<CastInst>(I)) { | ||
| Cur = Cast->getOperand(0); | ||
| } else if (auto *BinOp = dyn_cast<BinaryOperator>(I)) { | ||
| if (BinOp->getOpcode() != Instruction::Add && | ||
| BinOp->getOpcode() != Instruction::Sub) | ||
| return false; | ||
| if (!isa<ConstantInt>(BinOp->getOperand(1))) | ||
| return false; | ||
|
|
||
| VisitBinOp = true; | ||
| Cur = BinOp->getOperand(0); | ||
| } else { | ||
| return false; | ||
| } | ||
| } | ||
|
|
||
| // If there are only cast instructions, the phi is not an IV. Return false in | ||
| // this case. | ||
| return VisitBinOp; | ||
| } | ||
|
|
||
| PhiAnalyzer::PeelCounter | ||
| PhiAnalyzer::mergeTwoCounter(const Instruction &CmpOrBinaryOp, | ||
| const PeelCounterValue &LHS, | ||
| const PeelCounterValue &RHS) const { | ||
| auto &[LVal, LTy] = LHS; | ||
| auto &[RVal, RTy] = RHS; | ||
| unsigned NewVal = std::max(LVal, RVal); | ||
|
|
||
| // If either the type of LHS or the type of RHS is an induction, then the | ||
| // result of this instruction is also an induction only if it is an addition | ||
| // or a subtraction (after peeling enough times). Otherwise it can be a value | ||
| // that is neither an invariant nor an induction. | ||
| // | ||
| // If both the type of LHS and the type of RHS are invariants, then the | ||
| // result is also an invariant. | ||
| if (LTy == PeelCounterType::Induction || RTy == PeelCounterType::Induction) { | ||
| if (const auto *BinOp = dyn_cast<BinaryOperator>(&CmpOrBinaryOp)) { | ||
| if (BinOp->getOpcode() == Instruction::Add || | ||
| BinOp->getOpcode() == Instruction::Sub) | ||
| return PeelCounter({NewVal, PeelCounterType::Induction}); | ||
| } | ||
| return Unknown; | ||
| } | ||
| return PeelCounter({NewVal, PeelCounterType::Invariant}); | ||
| } | ||
|
|
||
| // This function calculates the number of iterations after which the value | ||
| // becomes an invariant. The pre-calculated values are memorized in a map. | ||
| // N.B. This number will be Unknown or <= MaxIterations. | ||
|
|
@@ -208,25 +338,34 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { | |
| // If we already know the answer, take it from the map. | ||
| // Otherwise, place Unknown to map to avoid infinite recursion. Such | ||
| // cycles can never stop on an invariant. | ||
| auto [I, Inserted] = IterationsToInvariance.try_emplace(&V, Unknown); | ||
| auto [I, Inserted] = | ||
| IterationsToInvarianceOrInduction.try_emplace(&V, Unknown); | ||
| if (!Inserted) | ||
| return I->second; | ||
|
|
||
| if (L.isLoopInvariant(&V)) | ||
| // Loop invariant so known at start. | ||
| return (IterationsToInvariance[&V] = 0); | ||
| return (IterationsToInvarianceOrInduction[&V] = | ||
| makeZero(PeelCounterType::Invariant)); | ||
| if (const PHINode *Phi = dyn_cast<PHINode>(&V)) { | ||
| if (Phi->getParent() != L.getHeader()) { | ||
| // Phi is not in header block so Unknown. | ||
| assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); | ||
| assert(IterationsToInvarianceOrInduction[&V] == Unknown && | ||
| "unexpected value saved"); | ||
| return Unknown; | ||
| } | ||
|
|
||
| // If Phi is an induction, register it as a starting point. | ||
| if (PeelForIV && isInductionPHI(Phi)) | ||
| return (IterationsToInvarianceOrInduction[&V] = | ||
| makeZero(PeelCounterType::Induction)); | ||
|
|
||
| // We need to analyze the input from the back edge and add 1. | ||
| Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch()); | ||
| PeelCounter Iterations = calculate(*Input); | ||
| assert(IterationsToInvariance[Input] == Iterations && | ||
| assert(IterationsToInvarianceOrInduction[Input] == Iterations && | ||
| "unexpected value saved"); | ||
| return (IterationsToInvariance[Phi] = addOne(Iterations)); | ||
| return (IterationsToInvarianceOrInduction[Phi] = addOne(Iterations)); | ||
| } | ||
| if (const Instruction *I = dyn_cast<Instruction>(&V)) { | ||
| if (isa<CmpInst>(I) || I->isBinaryOp()) { | ||
|
|
@@ -237,26 +376,30 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { | |
| PeelCounter RHS = calculate(*I->getOperand(1)); | ||
| if (RHS == Unknown) | ||
| return Unknown; | ||
| return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)}); | ||
| return (IterationsToInvarianceOrInduction[I] = | ||
| mergeTwoCounter(*I, *LHS, *RHS)); | ||
| } | ||
| if (I->isCast()) | ||
| // Cast instructions get the value of the operand. | ||
| return (IterationsToInvariance[I] = calculate(*I->getOperand(0))); | ||
| return (IterationsToInvarianceOrInduction[I] = | ||
| calculate(*I->getOperand(0))); | ||
| } | ||
| // TODO: handle more expressions | ||
|
|
||
| // Everything else is Unknown. | ||
| assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); | ||
| assert(IterationsToInvarianceOrInduction[&V] == Unknown && | ||
| "unexpected value saved"); | ||
| return Unknown; | ||
| } | ||
|
|
||
| std::optional<unsigned> PhiAnalyzer::calculateIterationsToPeel() { | ||
| unsigned Iterations = 0; | ||
| for (auto &PHI : L.getHeader()->phis()) { | ||
| PeelCounter ToInvariance = calculate(PHI); | ||
| if (ToInvariance != Unknown) { | ||
| assert(*ToInvariance <= MaxIterations && "bad result in phi analysis"); | ||
| Iterations = std::max(Iterations, *ToInvariance); | ||
| PeelCounter ToInvarianceOrInduction = calculate(PHI); | ||
| if (ToInvarianceOrInduction != Unknown) { | ||
| unsigned Val = ToInvarianceOrInduction->first; | ||
| assert(Val <= MaxIterations && "bad result in phi analysis"); | ||
| Iterations = std::max(Iterations, Val); | ||
| if (Iterations == MaxIterations) | ||
| break; | ||
| } | ||
|
|
@@ -585,14 +728,19 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, | |
| // in TTI.getPeelingPreferences or by the flag -unroll-peel-count. | ||
| unsigned DesiredPeelCount = TargetPeelCount; | ||
|
|
||
| // Here we try to get rid of Phis which become invariants after 1, 2, ..., N | ||
| // iterations of the loop. For this we compute the number for iterations after | ||
| // which every Phi is guaranteed to become an invariant, and try to peel the | ||
| // maximum number of iterations among these values, thus turning all those | ||
| // Phis into invariants. | ||
| // Here we try to get rid of Phis which become invariants or inductions after | ||
| // 1, 2, ..., N iterations of the loop. For this we compute the number for | ||
| // iterations after which every Phi is guaranteed to become an invariant or an | ||
| // induction, and try to peel the maximum number of iterations among these | ||
| // values, thus turning all those Phis into invariants or inductions. | ||
| if (MaxPeelCount > DesiredPeelCount) { | ||
| // Check how many iterations are useful for resolving Phis | ||
| auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel(); | ||
| // Check how many iterations are useful for resolving Phis. | ||
| // TODO: Compute `PeelForIV` with some heuristic. Peeling a loop to make a | ||
| // PHI into an IV is usually good for loop vectorization, so we should | ||
| // perform such peelings if the loop body is vectorizable (e.g., doesn't | ||
| // contain function calls). | ||
| auto NumPeels = PhiAnalyzer(*L, MaxPeelCount, EnablePeelingForIV) | ||
| .calculateIterationsToPeel(); | ||
| if (NumPeels) | ||
| DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels); | ||
| } | ||
|
|
@@ -610,7 +758,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, | |
| if (DesiredPeelCount + AlreadyPeeled <= UnrollPeelMaxCount) { | ||
| LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount | ||
| << " iteration(s) to turn" | ||
| << " some Phis into invariants.\n"); | ||
| << " some Phis into invariants or inductions.\n"); | ||
| PP.PeelCount = DesiredPeelCount; | ||
| PP.PeelProfiledIterations = false; | ||
| return; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Somewhat a pre-existing issue, but these comments should use
///.