diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index c31173879af1e..d494d75323d41 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -40,7 +41,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include #include #include @@ -57,6 +57,10 @@ static cl::opt MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden, cl::desc("Maximum number of rows to keep in constraint system")); +static cl::opt MaxColumns( + "constraint-elimination-max-cols", cl::init(50), cl::Hidden, + cl::desc("Maximum number of columns to keep in constraint system")); + static cl::opt DumpReproducers( "constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden, cl::desc("Dump IR to reproduce successful transformations.")); @@ -303,6 +307,7 @@ class ConstraintInfo { void popLastNVariables(bool Signed, unsigned N) { getCS(Signed).popLastNVariables(N); } + const DataLayout &getDataLayout() const { return DL; } bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const; @@ -1491,7 +1496,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info, /// Check if either the first condition of an AND or OR is implied by the /// (negated in case of OR) second condition or vice versa. static bool checkOrAndOpImpliedByOther( - FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, + const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, SmallVectorImpl &ReproducerCondStack, SmallVectorImpl &DFSInStack) { @@ -1671,18 +1676,118 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info, return Changed; } -static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, - ScalarEvolution &SE, - OptimizationRemarkEmitter &ORE) { - bool Changed = false; +/// Performs a dry run of AddFact, computing a conservative estimate of the +/// number of new variables introduced. +static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, + ConstraintInfo &Info, unsigned &EstimatedRowsA, + unsigned &EstimatedRowsB, + unsigned &EstimatedColumns) { + // What follows is a simplified dry-run of Info.getConstraint and addFact. + auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB, + &EstimatedColumns](CmpInst::Predicate Pred, Value *A, + Value *B) { + bool IsSigned = false; + switch (Pred) { + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_NE: + case CmpInst::ICMP_EQ: + break; + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SGE: + IsSigned = true; + break; + default: + return; + } + + // We abuse the Value2Index map by storing a map of Value -> size of + // decompose(Value).Vars. We do this to avoid wastefully calling decompose + // on chains we have already seen. + auto &DecompSizes = Info.getValue2Index(true); + const auto &DL = Info.getDataLayout(); + + auto GetDecompSize = [&DL, &DecompSizes, IsSigned](Value *V) { + if (!DecompSizes.contains(V)) { + SmallVector Preconditions; + auto Vars = decompose(V, Preconditions, IsSigned, DL).Vars; + for (const auto &[Idx, KV] : enumerate(Vars)) + // decompose matches the passed Value against some pattern, and + // recursively calls itself on the matched inner values, merging + // results into the final chain. We conservatively estimate that any + // sub-chain in the decompose chain will have the length of the chain + // minus the index: the results of two sub-chains could be merged, + // leading to a conservative estimate. This is however, not a problem + // in practice, as these conservative estimates are anyway less than + // the the size of the total chain, and std::max on the estimated + // columns will ignore these smaller estiamtes. + DecompSizes.insert({KV.Variable, Vars.size() - Idx}); + DecompSizes.insert({V, Vars.size()}); + } + return DecompSizes.at(V); + }; + + unsigned ASize = GetDecompSize(A->stripPointerCastsSameRepresentation()); + unsigned BSize = GetDecompSize(B->stripPointerCastsSameRepresentation()); + + if (IsSigned) + ++EstimatedRowsA; + else + ++EstimatedRowsB; + + EstimatedColumns = std::max(EstimatedColumns, ASize + BSize + 2); + }; + + UpdateEstimate(Pred, A, B); + + // What follows is a simplified dry-run of transferToOtherSystem. + if (!A->getType()->isIntegerTy()) + return; + + switch (Pred) { + default: + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_UGT: + UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)); + UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B); + break; + case CmpInst::ICMP_SLT: + UpdateEstimate(CmpInst::ICMP_ULT, A, B); + break; + case CmpInst::ICMP_SGT: + UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0)); + UpdateEstimate(CmpInst::ICMP_UGT, A, B); + break; + case CmpInst::ICMP_SGE: + UpdateEstimate(CmpInst::ICMP_UGE, A, B); + break; + } +} + +/// Performs a dry run of the transform, computing a conservative estimate of +/// the total number of rows and columns we need in the underlying storage. +static std::tuple +dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { DT.updateDFSNumbers(); - SmallVector FunctionArgs; - for (Value &Arg : F.args()) - FunctionArgs.push_back(&Arg); - ConstraintInfo Info(F.getDataLayout(), FunctionArgs); State S(DT, LI, SE); - std::unique_ptr ReproducerModule( - DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr); + + // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to + // UnsignedCS. + unsigned EstimatedRowsA = 0, EstimatedRowsB = 1; + unsigned EstimatedColumns = F.arg_end() - F.arg_begin() + 1; + ConstraintInfo Info(F.getDataLayout(), {}); + + // We abuse the Value2Index map by storing a map of Value -> size of + // decompose(Value).Vars. We do this to avoid wastefully calling decompose + // on chains we have already seen. + // + // Function arguments don't decompose into anything non-trivial. + auto &DecompSizes = Info.getValue2Index(true); + for (Value &A : F.args()) + DecompSizes.insert({&A, 1}); // First, collect conditions implied by branches and blocks with their // Dominator DFS in and out numbers. @@ -1725,12 +1830,90 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, return A.NumIn < B.NumIn; }); + for (const FactOrCheck &CB : S.WorkList) { + ICmpInst::Predicate Pred; + Value *A, *B; + if (CB.isCheck()) { + // What follows is a dry-run of checkOrAndOpImpliedByOther, without + // assuming that instructions have been simplified, as they would have + // during the course of normal operation. + auto *ContextInst = CB.getContextInst(); + if (auto *Cmp = + dyn_cast_or_null(CB.getInstructionToSimplify())) { + unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0; + if (match(ContextInst, m_LogicalOp()) && + match(ContextInst->getOperand(OtherOpIdx), + m_ICmp(Pred, m_Value(A), m_Value(B)))) { + if (match(ContextInst, m_LogicalOr())) + Pred = CmpInst::getInversePredicate(Pred); + dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB, + EstimatedColumns); + } + } + continue; + } + if (!CB.isConditionFact()) { + Value *X; + if (match(CB.Inst, m_Intrinsic(m_Value(X)))) { + if (cast(CB.Inst->getOperand(1))->isOne()) + dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, + ConstantInt::get(CB.Inst->getType(), 0), Info, + EstimatedRowsA, EstimatedRowsB, EstimatedColumns); + dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedRowsA, + EstimatedRowsB, EstimatedColumns); + continue; + } + + if (auto *MinMax = dyn_cast(CB.Inst)) { + Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); + dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedRowsA, + EstimatedRowsB, EstimatedColumns); + dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedRowsA, + EstimatedRowsB, EstimatedColumns); + continue; + } + } + + if (CB.isConditionFact()) { + Pred = CB.Cond.Pred; + A = CB.Cond.Op0; + B = CB.Cond.Op1; + } else { + bool Matched = match(CB.Inst, m_Intrinsic( + m_ICmp(Pred, m_Value(A), m_Value(B)))); + (void)Matched; + assert(Matched && "Must have an assume intrinsic with a icmp operand"); + } + dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB, + EstimatedColumns); + } + return {S, std::max(EstimatedRowsA, EstimatedRowsB), EstimatedColumns}; +} + +static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, + ScalarEvolution &SE, + OptimizationRemarkEmitter &ORE) { + bool Changed = false; + const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE); + + // Fail early if estimates exceed limits. Row estimate could be off by up to + // 60%. + if (EstimatedRows > 1.6 * MaxRows || EstimatedColumns > MaxColumns) + return false; + + SmallVector FunctionArgs; + for (Value &Arg : F.args()) + FunctionArgs.push_back(&Arg); + ConstraintInfo Info(F.getDataLayout(), FunctionArgs); + std::unique_ptr ReproducerModule( + DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr); + SmallVector ToRemove; // Finally, process ordered worklist and eliminate implied conditions. SmallVector DFSInStack; SmallVector ReproducerCondStack; - for (FactOrCheck &CB : S.WorkList) { + for (const FactOrCheck &CB : S.WorkList) { // First, pop entries from the stack that are out-of-scope for CB. Remove // the corresponding entry from the constraint system. while (!DFSInStack.empty()) { diff --git a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll similarity index 81% rename from llvm/test/Transforms/ConstraintElimination/max-row-limit.ll rename to llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll index 0e078109ed663..2f3b62dc5dab7 100644 --- a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll +++ b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=COMMON,SIMP %s -; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=9 -S %s | FileCheck --check-prefixes=COMMON,SIMP %s -; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=COMMON,NOSIMP %s +; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=SIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=SIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=6 -S %s | FileCheck --check-prefixes=SIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=7 -S %s | FileCheck --check-prefixes=NOSIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=5 -S %s | FileCheck --check-prefixes=NOSIMP %s define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) { @@ -22,7 +24,8 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) { ; SIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100 ; SIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]] ; SIMP: bb5: -; SIMP-NEXT: ret i1 true +; SIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4:%.*]], 100 +; SIMP-NEXT: ret i1 [[C5]] ; SIMP: exit: ; SIMP-NEXT: ret i1 false ; @@ -43,7 +46,7 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) { ; NOSIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100 ; NOSIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]] ; NOSIMP: bb5: -; NOSIMP-NEXT: [[C5:%.*]] = icmp uge i32 [[L4]], 100 +; NOSIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4]], 100 ; NOSIMP-NEXT: ret i1 [[C5]] ; NOSIMP: exit: ; NOSIMP-NEXT: ret i1 false @@ -69,11 +72,9 @@ bb4: br i1 %c4, label %bb5, label %exit bb5: - %c5 = icmp uge i32 %l4, 100 + %c5 = icmp sge i32 %l4, 100 ret i1 %c5 exit: ret i1 false } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; COMMON: {{.*}}