diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index e2516930d251b..424521de635d9 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopIterator.h" @@ -160,7 +161,7 @@ class PhiAnalyzer { // Calculate the sufficient minimum number of iterations of the loop to peel // such that phi instructions become determined (subject to allowable limits) - std::optional calculateIterationsToPeel(); + std::optional calculateIterationsToPeel(ScalarEvolution &SE); protected: using PeelCounter = std::optional; @@ -175,7 +176,7 @@ class PhiAnalyzer { // Calculate the number of iterations after which the given value // becomes an invariant. - PeelCounter calculate(const Value &); + PeelCounter calculate(Value &, ScalarEvolution &SE); const Loop &L; const unsigned MaxIterations; @@ -204,7 +205,7 @@ PhiAnalyzer::PhiAnalyzer(const Loop &L, unsigned MaxIterations) // %y = phi(0, 5) // %a = %y + 1 // G(%y) = Unknown otherwise (including phi not in header block) -PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { +PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(Value &V, ScalarEvolution &SE) { // If we already know the answer, take it from the map. auto I = IterationsToInvariance.find(&V); if (I != IterationsToInvariance.end()) @@ -217,15 +218,30 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { if (L.isLoopInvariant(&V)) // Loop invariant so known at start. return (IterationsToInvariance[&V] = 0); - if (const PHINode *Phi = dyn_cast(&V)) { + if (PHINode *Phi = dyn_cast(&V)) { if (Phi->getParent() != L.getHeader()) { // Phi is not in header block so Unknown. assert(IterationsToInvariance[&V] == Unknown && "unexpected value saved"); return Unknown; } + + // If Induction PHI, register as a starting point. + // For patterns that include induction variables in Phi's chain. + // The example looks like: + // for.body: + // %i = phi(0, %inc) + // %x = phi(0, %y) + // %a = phi(0, %add) + // %y = phi(0, %a) + // %add = %i + 2 + // %inc = %i + 1 + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(Phi, &L, &SE, ID)) + return (IterationsToInvariance[&V] = 0); + // We need to analyze the input from the back edge and add 1. Value *Input = Phi->getIncomingValueForBlock(L.getLoopLatch()); - PeelCounter Iterations = calculate(*Input); + PeelCounter Iterations = calculate(*Input, SE); assert(IterationsToInvariance[Input] == Iterations && "unexpected value saved"); return (IterationsToInvariance[Phi] = addOne(Iterations)); @@ -233,17 +249,17 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { if (const Instruction *I = dyn_cast(&V)) { if (isa(I) || I->isBinaryOp()) { // Binary instructions get the max of the operands. - PeelCounter LHS = calculate(*I->getOperand(0)); + PeelCounter LHS = calculate(*I->getOperand(0), SE); if (LHS == Unknown) return Unknown; - PeelCounter RHS = calculate(*I->getOperand(1)); + PeelCounter RHS = calculate(*I->getOperand(1), SE); if (RHS == Unknown) return Unknown; return (IterationsToInvariance[I] = {std::max(*LHS, *RHS)}); } if (I->isCast()) // Cast instructions get the value of the operand. - return (IterationsToInvariance[I] = calculate(*I->getOperand(0))); + return (IterationsToInvariance[I] = calculate(*I->getOperand(0), SE)); } // TODO: handle more expressions @@ -252,10 +268,11 @@ PhiAnalyzer::PeelCounter PhiAnalyzer::calculate(const Value &V) { return Unknown; } -std::optional PhiAnalyzer::calculateIterationsToPeel() { +std::optional +PhiAnalyzer::calculateIterationsToPeel(ScalarEvolution &SE) { unsigned Iterations = 0; for (auto &PHI : L.getHeader()->phis()) { - PeelCounter ToInvariance = calculate(PHI); + PeelCounter ToInvariance = calculate(PHI, SE); if (ToInvariance != Unknown) { assert(*ToInvariance <= MaxIterations && "bad result in phi analysis"); Iterations = std::max(Iterations, *ToInvariance); @@ -594,7 +611,7 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize, // Phis into invariants. if (MaxPeelCount > DesiredPeelCount) { // Check how many iterations are useful for resolving Phis - auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel(); + auto NumPeels = PhiAnalyzer(*L, MaxPeelCount).calculateIterationsToPeel(SE); if (NumPeels) DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels); } diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll index e24eeef52de4e..3800c48954a6e 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-phi-analysis.ll @@ -197,3 +197,101 @@ for.body: %exitcond = icmp eq i32 %inc, 100000 br i1 %exitcond, label %for.cond.cleanup, label %for.body } + +; Check that phi analysis can handle a binary operator with induction variable. +define void @_Z6binaryv_induction() { +; The phis become invariant through the chain of phis, with a unary +; instruction on a loop invariant. Check that the phis for x, a, and y +; are removed since x is based on y, which is based on a, which is based +; on a binary add of a phi and a constant. +; Consider the calls to g: +; First iteration: g(0), x=0, g(0), y=1, a=2 +; Second iteration: g(0), x=1, g(2), y=3(binary operator), a=3 +; Third iteration: g(1), x=3, g(3), y=4, a=4 +; Fourth iteration (and subsequent): g(3), x=4, g(4), y=5, a=6 +; Therefore, peeling 3 times removes the phi nodes. +; +; void g(int); +; void binary() { +; int x = 0; +; int y = 0; +; int a = 0; +; for(int i = 0; i <100000; ++i) { +; g(x); +; x = y; +; g(a); +; y = a + 1; +; a = i + 2; +; } +; } +; CHECK-LABEL: @_Z6binaryv_induction( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_BEGIN:%.*]] +; CHECK: for.body.peel.begin: +; CHECK-NEXT: br label [[FOR_BODY_PEEL:%.*]] +; CHECK: for.body.peel: +; CHECK-NEXT: tail call void @_Z1gi(i32 signext 0) +; CHECK-NEXT: tail call void @_Z1gi(i32 signext 0) +; CHECK-NEXT: [[ADD_PEEL:%.*]] = add nuw nsw i32 0, 2 +; CHECK-NEXT: [[INC_PEEL:%.*]] = add nuw nsw i32 0, 1 +; CHECK-NEXT: [[EXITCOND_PEEL:%.*]] = icmp ne i32 [[INC_PEEL]], 100000 +; CHECK-NEXT: br i1 [[EXITCOND_PEEL]], label [[FOR_BODY_PEEL_NEXT:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.body.peel.next: +; CHECK-NEXT: br label [[FOR_BODY_PEEL2:%.*]] +; CHECK: for.body.peel2: +; CHECK-NEXT: tail call void @_Z1gi(i32 signext 0) +; CHECK-NEXT: tail call void @_Z1gi(i32 signext [[ADD_PEEL]]) +; CHECK-NEXT: [[ADD_PEEL3:%.*]] = add nuw nsw i32 [[INC_PEEL]], 2 +; CHECK-NEXT: [[INC_PEEL4:%.*]] = add nuw nsw i32 [[INC_PEEL]], 1 +; CHECK-NEXT: [[EXITCOND_PEEL5:%.*]] = icmp ne i32 [[INC_PEEL4]], 100000 +; CHECK-NEXT: br i1 [[EXITCOND_PEEL5]], label [[FOR_BODY_PEEL_NEXT1:%.*]], label [[FOR_COND_CLEANUP]] +; CHECK: for.body.peel.next1: +; CHECK-NEXT: br label [[FOR_BODY_PEEL7:%.*]] +; CHECK: for.body.peel7: +; CHECK-NEXT: tail call void @_Z1gi(i32 signext 0) +; CHECK-NEXT: tail call void @_Z1gi(i32 signext [[ADD_PEEL3]]) +; CHECK-NEXT: [[ADD_PEEL8:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 2 +; CHECK-NEXT: [[INC_PEEL9:%.*]] = add nuw nsw i32 [[INC_PEEL4]], 1 +; CHECK-NEXT: [[EXITCOND_PEEL10:%.*]] = icmp ne i32 [[INC_PEEL9]], 100000 +; CHECK-NEXT: br i1 [[EXITCOND_PEEL10]], label [[FOR_BODY_PEEL_NEXT6:%.*]], label [[FOR_COND_CLEANUP]] +; CHECK: for.body.peel.next6: +; CHECK-NEXT: br label [[FOR_BODY_PEEL_NEXT11:%.*]] +; CHECK: for.body.peel.next11: +; CHECK-NEXT: br label [[ENTRY_PEEL_NEWPH:%.*]] +; CHECK: entry.peel.newph: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.body: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[INC_PEEL9]], [[ENTRY_PEEL_NEWPH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[X:%.*]] = phi i32 [ [[ADD_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[Y:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[ADD_PEEL8]], [[ENTRY_PEEL_NEWPH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[Y]] = phi i32 [ [[ADD_PEEL3]], [[ENTRY_PEEL_NEWPH]] ], [ [[A]], [[FOR_BODY]] ] +; CHECK-NEXT: tail call void @_Z1gi(i32 signext [[X]]) +; CHECK-NEXT: tail call void @_Z1gi(i32 signext [[A]]) +; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[I]], 2 +; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 100000 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]] +; +entry: + br label %for.body + +for.cond.cleanup: + ret void + +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %x = phi i32 [ 0, %entry ], [ %y, %for.body ] + %a = phi i32 [ 0, %entry ], [ %add, %for.body ] + %y = phi i32 [ 0, %entry ], [ %a, %for.body ] + tail call void @_Z1gi(i32 signext %x) + tail call void @_Z1gi(i32 signext %a) + %add = add nuw nsw i32 %i, 2 + %inc = add nuw nsw i32 %i, 1 + %exitcond = icmp ne i32 %inc, 100000 + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} +