From a4b9f80e85d0fdd0e0e6390eb4548c88aa3fd35e Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Tue, 16 Jul 2024 19:54:11 +0100 Subject: [PATCH 1/6] ConstraintElim: add dry-run routine to fail early Add a dry-run routine that computes a conservative estimate of the number of rows and columns that the transform will require, and fail early if the estimates exceed the upper bounds. This patch has a small overhead, but improves compile-time on one benchmark significantly. The overhead will be compensated for in a follow-up patch, where ConstraintSystem is ported to use a Matrix data structure, performing the full allocation ahead-of-time using these estimates. --- .../Scalar/ConstraintElimination.cpp | 175 +++++++++++++++++- ...x-row-limit.ll => max-row-column-limit.ll} | 17 +- 2 files changed, 174 insertions(+), 18 deletions(-) rename llvm/test/Transforms/ConstraintElimination/{max-row-limit.ll => max-row-column-limit.ll} (81%) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index c31173879af1e..369a6daa4d970 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -40,7 +40,6 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" -#include #include #include @@ -57,6 +56,10 @@ static cl::opt MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden, cl::desc("Maximum number of rows to keep in constraint system")); +static cl::opt MaxColumns( + "constraint-elimination-max-cols", cl::init(50), cl::Hidden, + cl::desc("Maximum number of columns to keep in constraint system")); + static cl::opt DumpReproducers( "constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden, cl::desc("Dump IR to reproduce successful transformations.")); @@ -303,6 +306,7 @@ class ConstraintInfo { void popLastNVariables(bool Signed, unsigned N) { getCS(Signed).popLastNVariables(N); } + const DataLayout &getDataLayout() const { return DL; } bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const; @@ -1491,7 +1495,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info, /// Check if either the first condition of an AND or OR is implied by the /// (negated in case of OR) second condition or vice versa. static bool checkOrAndOpImpliedByOther( - FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, + const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule, SmallVectorImpl &ReproducerCondStack, SmallVectorImpl &DFSInStack) { @@ -1671,18 +1675,91 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info, return Changed; } -static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, - ScalarEvolution &SE, - OptimizationRemarkEmitter &ORE) { - bool Changed = false; +/// Performs a dry run of AddFact, computing a conservative estimate of the +/// number of new variables introduced. +static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, + const ConstraintInfo &Info, unsigned &EstimatedRowsA, + unsigned &EstimatedRowsB, + unsigned &EstimatedColumns) { + auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB, + &EstimatedColumns](CmpInst::Predicate Pred, Value *A, + Value *B) { + SmallVector NewVars; + auto R = Info.getConstraint(Pred, A, B, NewVars); + + // We offset it by 1 due to logic in addFact. + unsigned NewEstimate = + count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1; + + EstimatedColumns = std::max(EstimatedColumns, NewEstimate); + if (R.IsSigned) + ++EstimatedRowsA; + else + ++EstimatedRowsB; + }; + + UpdateEstimate(Pred, A, B); + + // What follows is a dry-run of transferToOtherSystem. + auto IsKnownNonNegative = [&Info](Value *V) { + return Info.doesHold(CmpInst::ICMP_SGE, V, + ConstantInt::get(V->getType(), 0)) || + isKnownNonNegative(V, Info.getDataLayout(), + MaxAnalysisRecursionDepth - 1); + }; + + if (!A->getType()->isIntegerTy()) + return; + + switch (Pred) { + default: + break; + case CmpInst::ICMP_ULT: + case CmpInst::ICMP_ULE: + if (IsKnownNonNegative(B)) { + UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)); + UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B); + } + break; + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_UGT: + if (IsKnownNonNegative(A)) { + UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0)); + UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B); + } + break; + case CmpInst::ICMP_SLT: + if (IsKnownNonNegative(A)) + UpdateEstimate(CmpInst::ICMP_ULT, A, B); + break; + case CmpInst::ICMP_SGT: + if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1))) + UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0)); + if (IsKnownNonNegative(B)) + UpdateEstimate(CmpInst::ICMP_UGT, A, B); + break; + case CmpInst::ICMP_SGE: + if (IsKnownNonNegative(B)) + UpdateEstimate(CmpInst::ICMP_UGE, A, B); + break; + } +} + +/// Performs a dry run of the transform, computing a conservative estimate of +/// the total number of columns we need in the underlying storage. +static std::tuple +dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { DT.updateDFSNumbers(); SmallVector FunctionArgs; for (Value &Arg : F.args()) FunctionArgs.push_back(&Arg); - ConstraintInfo Info(F.getDataLayout(), FunctionArgs); State S(DT, LI, SE); - std::unique_ptr ReproducerModule( - DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr); + unsigned EstimatedColumns = FunctionArgs.size() + 1; + + // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to + // UnsignedCS. + unsigned EstimatedRowsA = 0, EstimatedRowsB = 1; + ConstraintInfo Info(F.getDataLayout(), FunctionArgs); // First, collect conditions implied by branches and blocks with their // Dominator DFS in and out numbers. @@ -1725,12 +1802,90 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, return A.NumIn < B.NumIn; }); + for (const FactOrCheck &CB : S.WorkList) { + ICmpInst::Predicate Pred; + Value *A, *B; + if (CB.isCheck()) { + // What follows is a dry-run of checkOrAndOpImpliedByOther, without + // assuming that instructions have been simplified, as they would have + // during the course of normal operation. + auto *ContextInst = CB.getContextInst(); + if (auto *Cmp = + dyn_cast_or_null(CB.getInstructionToSimplify())) { + unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0; + if (match(ContextInst, m_LogicalOp()) && + match(ContextInst->getOperand(OtherOpIdx), + m_ICmp(Pred, m_Value(A), m_Value(B)))) { + if (match(ContextInst, m_LogicalOr())) + Pred = CmpInst::getInversePredicate(Pred); + dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB, + EstimatedColumns); + } + } + continue; + } + if (!CB.isConditionFact()) { + Value *X; + if (match(CB.Inst, m_Intrinsic(m_Value(X)))) { + if (cast(CB.Inst->getOperand(1))->isOne()) + dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, + ConstantInt::get(CB.Inst->getType(), 0), Info, + EstimatedRowsA, EstimatedRowsB, EstimatedColumns); + dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedRowsA, + EstimatedRowsB, EstimatedColumns); + continue; + } + + if (auto *MinMax = dyn_cast(CB.Inst)) { + Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate()); + dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedRowsA, + EstimatedRowsB, EstimatedColumns); + dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedRowsA, + EstimatedRowsB, EstimatedColumns); + continue; + } + } + + if (CB.isConditionFact()) { + Pred = CB.Cond.Pred; + A = CB.Cond.Op0; + B = CB.Cond.Op1; + } else { + bool Matched = match(CB.Inst, m_Intrinsic( + m_ICmp(Pred, m_Value(A), m_Value(B)))); + (void)Matched; + assert(Matched && "Must have an assume intrinsic with a icmp operand"); + } + dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB, + EstimatedColumns); + } + return {S, std::max(EstimatedRowsA, EstimatedRowsB), EstimatedColumns}; +} + +static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, + ScalarEvolution &SE, + OptimizationRemarkEmitter &ORE) { + bool Changed = false; + const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE); + + // Fail early if estimates exceed limits. Row estimate could be off by up to + // 40%. + if (EstimatedRows > 1.4 * MaxRows || EstimatedColumns > MaxColumns) + return false; + + SmallVector FunctionArgs; + for (Value &Arg : F.args()) + FunctionArgs.push_back(&Arg); + ConstraintInfo Info(F.getDataLayout(), FunctionArgs); + std::unique_ptr ReproducerModule( + DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr); + SmallVector ToRemove; // Finally, process ordered worklist and eliminate implied conditions. SmallVector DFSInStack; SmallVector ReproducerCondStack; - for (FactOrCheck &CB : S.WorkList) { + for (const FactOrCheck &CB : S.WorkList) { // First, pop entries from the stack that are out-of-scope for CB. Remove // the corresponding entry from the constraint system. while (!DFSInStack.empty()) { diff --git a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll similarity index 81% rename from llvm/test/Transforms/ConstraintElimination/max-row-limit.ll rename to llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll index 0e078109ed663..2f3b62dc5dab7 100644 --- a/llvm/test/Transforms/ConstraintElimination/max-row-limit.ll +++ b/llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll @@ -1,7 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=COMMON,SIMP %s -; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=9 -S %s | FileCheck --check-prefixes=COMMON,SIMP %s -; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=COMMON,NOSIMP %s +; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=SIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=SIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=6 -S %s | FileCheck --check-prefixes=SIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=7 -S %s | FileCheck --check-prefixes=NOSIMP %s +; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=5 -S %s | FileCheck --check-prefixes=NOSIMP %s define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) { @@ -22,7 +24,8 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) { ; SIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100 ; SIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]] ; SIMP: bb5: -; SIMP-NEXT: ret i1 true +; SIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4:%.*]], 100 +; SIMP-NEXT: ret i1 [[C5]] ; SIMP: exit: ; SIMP-NEXT: ret i1 false ; @@ -43,7 +46,7 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) { ; NOSIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100 ; NOSIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]] ; NOSIMP: bb5: -; NOSIMP-NEXT: [[C5:%.*]] = icmp uge i32 [[L4]], 100 +; NOSIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4]], 100 ; NOSIMP-NEXT: ret i1 [[C5]] ; NOSIMP: exit: ; NOSIMP-NEXT: ret i1 false @@ -69,11 +72,9 @@ bb4: br i1 %c4, label %bb5, label %exit bb5: - %c5 = icmp uge i32 %l4, 100 + %c5 = icmp sge i32 %l4, 100 ret i1 %c5 exit: ret i1 false } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; COMMON: {{.*}} From 50c2e95b9a66395d5f6b62a99e67fec1a874f8f5 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sat, 20 Jul 2024 13:32:16 +0100 Subject: [PATCH 2/6] ConstraintElim: attempt to make estimation cheaper --- .../Scalar/ConstraintElimination.cpp | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 369a6daa4d970..2c12c4fafd84a 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1684,30 +1684,47 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB, &EstimatedColumns](CmpInst::Predicate Pred, Value *A, Value *B) { - SmallVector NewVars; - auto R = Info.getConstraint(Pred, A, B, NewVars); + // What follows is a simplified dry-run of Info.getConstraint and addFact. + unsigned NumNewVars = 0; + bool IsSigned = false; + + switch (Pred) { + case CmpInst::ICMP_UGT: + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_NE: + case CmpInst::ICMP_EQ: + break; + case CmpInst::ICMP_SGT: + case CmpInst::ICMP_SGE: + IsSigned = true; + break; + default: + return; + } - // We offset it by 1 due to logic in addFact. - unsigned NewEstimate = - count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1; + SmallVector Preconditions; + auto &Value2Index = Info.getValue2Index(IsSigned); + auto ADec = decompose(A->stripPointerCastsSameRepresentation(), + Preconditions, IsSigned, Info.getDataLayout()); + auto BDec = decompose(B->stripPointerCastsSameRepresentation(), + Preconditions, IsSigned, Info.getDataLayout()); + for (const auto &KV : concat(ADec.Vars, BDec.Vars)) { + if (!Value2Index.contains(KV.Variable)) + ++NumNewVars; + } - EstimatedColumns = std::max(EstimatedColumns, NewEstimate); - if (R.IsSigned) + if (IsSigned) ++EstimatedRowsA; else ++EstimatedRowsB; + + EstimatedColumns = + std::max(EstimatedColumns, NumNewVars + Value2Index.size() + 2); }; UpdateEstimate(Pred, A, B); - // What follows is a dry-run of transferToOtherSystem. - auto IsKnownNonNegative = [&Info](Value *V) { - return Info.doesHold(CmpInst::ICMP_SGE, V, - ConstantInt::get(V->getType(), 0)) || - isKnownNonNegative(V, Info.getDataLayout(), - MaxAnalysisRecursionDepth - 1); - }; - + // What follows is a simplified dry-run of transferToOtherSystem. if (!A->getType()->isIntegerTy()) return; @@ -1716,31 +1733,20 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, break; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - if (IsKnownNonNegative(B)) { - UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)); - UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B); - } - break; case CmpInst::ICMP_UGE: case CmpInst::ICMP_UGT: - if (IsKnownNonNegative(A)) { - UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0)); - UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B); - } + UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)); + UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B); break; case CmpInst::ICMP_SLT: - if (IsKnownNonNegative(A)) - UpdateEstimate(CmpInst::ICMP_ULT, A, B); + UpdateEstimate(CmpInst::ICMP_ULT, A, B); break; case CmpInst::ICMP_SGT: - if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1))) - UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0)); - if (IsKnownNonNegative(B)) - UpdateEstimate(CmpInst::ICMP_UGT, A, B); + UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0)); + UpdateEstimate(CmpInst::ICMP_UGT, A, B); break; case CmpInst::ICMP_SGE: - if (IsKnownNonNegative(B)) - UpdateEstimate(CmpInst::ICMP_UGE, A, B); + UpdateEstimate(CmpInst::ICMP_UGE, A, B); break; } } From 11b306fae6c365a152d8fb9b61846ca733ac87c1 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Sun, 21 Jul 2024 14:38:05 +0100 Subject: [PATCH 3/6] ConstraintElim: avoid running decompose multiple times --- .../Scalar/ConstraintElimination.cpp | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 2c12c4fafd84a..334fb8e5d6752 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -31,6 +31,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Value.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" @@ -1678,7 +1679,7 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info, /// Performs a dry run of AddFact, computing a conservative estimate of the /// number of new variables introduced. static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, - const ConstraintInfo &Info, unsigned &EstimatedRowsA, + ConstraintInfo &Info, unsigned &EstimatedRowsA, unsigned &EstimatedRowsB, unsigned &EstimatedColumns) { auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB, @@ -1704,12 +1705,24 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, SmallVector Preconditions; auto &Value2Index = Info.getValue2Index(IsSigned); - auto ADec = decompose(A->stripPointerCastsSameRepresentation(), - Preconditions, IsSigned, Info.getDataLayout()); - auto BDec = decompose(B->stripPointerCastsSameRepresentation(), - Preconditions, IsSigned, Info.getDataLayout()); - for (const auto &KV : concat(ADec.Vars, BDec.Vars)) { - if (!Value2Index.contains(KV.Variable)) + Value *AStrip = A->stripPointerCastsSameRepresentation(); + Value *BStrip = B->stripPointerCastsSameRepresentation(); + SmallVector AVars, BVars; + + if (!Value2Index.contains(AStrip)) { + AVars = + decompose(AStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars; + Value2Index.insert({AStrip, Value2Index.size() + 1}); + } + if (!Value2Index.contains(BStrip)) { + BVars = + decompose(BStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars; + Value2Index.insert({BStrip, Value2Index.size() + 1}); + } + + for (const auto &KV : concat(AVars, BVars)) { + if (KV.Variable == AStrip || KV.Variable == BStrip || + !Value2Index.contains(KV.Variable)) ++NumNewVars; } @@ -1718,8 +1731,7 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, else ++EstimatedRowsB; - EstimatedColumns = - std::max(EstimatedColumns, NumNewVars + Value2Index.size() + 2); + EstimatedColumns = std::max(EstimatedColumns, NumNewVars + 2); }; UpdateEstimate(Pred, A, B); @@ -1756,16 +1768,13 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, static std::tuple dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { DT.updateDFSNumbers(); - SmallVector FunctionArgs; - for (Value &Arg : F.args()) - FunctionArgs.push_back(&Arg); State S(DT, LI, SE); - unsigned EstimatedColumns = FunctionArgs.size() + 1; // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to // UnsignedCS. unsigned EstimatedRowsA = 0, EstimatedRowsB = 1; - ConstraintInfo Info(F.getDataLayout(), FunctionArgs); + unsigned EstimatedColumns = 1; + ConstraintInfo Info(F.getDataLayout(), {}); // First, collect conditions implied by branches and blocks with their // Dominator DFS in and out numbers. From f0d674c9f5aa601c0e5472f208ff96f3d855d9b6 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 22 Jul 2024 16:47:42 +0100 Subject: [PATCH 4/6] ConstraintElim: idea for optimizing dry-run further --- .../Scalar/ConstraintElimination.cpp | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 334fb8e5d6752..fdd463273edd1 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1682,13 +1682,11 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, ConstraintInfo &Info, unsigned &EstimatedRowsA, unsigned &EstimatedRowsB, unsigned &EstimatedColumns) { + // What follows is a simplified dry-run of Info.getConstraint and addFact. auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB, &EstimatedColumns](CmpInst::Predicate Pred, Value *A, Value *B) { - // What follows is a simplified dry-run of Info.getConstraint and addFact. - unsigned NumNewVars = 0; bool IsSigned = false; - switch (Pred) { case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: @@ -1703,35 +1701,41 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, return; } - SmallVector Preconditions; - auto &Value2Index = Info.getValue2Index(IsSigned); - Value *AStrip = A->stripPointerCastsSameRepresentation(); - Value *BStrip = B->stripPointerCastsSameRepresentation(); - SmallVector AVars, BVars; - - if (!Value2Index.contains(AStrip)) { - AVars = - decompose(AStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars; - Value2Index.insert({AStrip, Value2Index.size() + 1}); - } - if (!Value2Index.contains(BStrip)) { - BVars = - decompose(BStrip, Preconditions, IsSigned, Info.getDataLayout()).Vars; - Value2Index.insert({BStrip, Value2Index.size() + 1}); - } + // We abuse the Value2Index map by storing a map of Value -> size of + // decompose(Value).Vars. We do this to avoid wastefully calling decompose + // on chains we have already seen. + auto &DecompSizes = Info.getValue2Index(IsSigned); + const auto &DL = Info.getDataLayout(); + + auto GetDecompSize = [&DL, &DecompSizes, IsSigned](Value *V) { + if (!DecompSizes.contains(V)) { + SmallVector Preconditions; + auto Vars = decompose(V, Preconditions, IsSigned, DL).Vars; + for (const auto &[Idx, KV] : enumerate(Vars)) + // decompose matches the passed Value against some pattern, and + // recursively calls itself on the matched inner values, merging + // results into the final chain. We conservatively estimate that any + // sub-chain in the decompose chain will have the length of the chain + // minus the index: the results of two sub-chains could be merged, + // leading to a conservative estimate. This is however, not a problem + // in practice, as these conservative estimates are anyway less than + // the the size of the total chain, and std::max on the estimated + // columns will ignore these smaller estiamtes. + DecompSizes.insert({KV.Variable, Vars.size() - Idx}); + DecompSizes.insert({V, Vars.size()}); + } + return DecompSizes.at(V); + }; - for (const auto &KV : concat(AVars, BVars)) { - if (KV.Variable == AStrip || KV.Variable == BStrip || - !Value2Index.contains(KV.Variable)) - ++NumNewVars; - } + unsigned ASize = GetDecompSize(A->stripPointerCastsSameRepresentation()); + unsigned BSize = GetDecompSize(B->stripPointerCastsSameRepresentation()); if (IsSigned) ++EstimatedRowsA; else ++EstimatedRowsB; - EstimatedColumns = std::max(EstimatedColumns, NumNewVars + 2); + EstimatedColumns = std::max(EstimatedColumns, ASize + BSize + 2); }; UpdateEstimate(Pred, A, B); @@ -1764,7 +1768,7 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, } /// Performs a dry run of the transform, computing a conservative estimate of -/// the total number of columns we need in the underlying storage. +/// the total number of rows and columns we need in the underlying storage. static std::tuple dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { DT.updateDFSNumbers(); From 91468794b11071707b53fddec6bc6d757ea9302f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 22 Jul 2024 18:45:07 +0100 Subject: [PATCH 5/6] ConstraintElim: function args have trivial decomp --- .../lib/Transforms/Scalar/ConstraintElimination.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index fdd463273edd1..40d117d49489e 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1704,7 +1704,7 @@ static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B, // We abuse the Value2Index map by storing a map of Value -> size of // decompose(Value).Vars. We do this to avoid wastefully calling decompose // on chains we have already seen. - auto &DecompSizes = Info.getValue2Index(IsSigned); + auto &DecompSizes = Info.getValue2Index(true); const auto &DL = Info.getDataLayout(); auto GetDecompSize = [&DL, &DecompSizes, IsSigned](Value *V) { @@ -1777,9 +1777,18 @@ dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) { // EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to // UnsignedCS. unsigned EstimatedRowsA = 0, EstimatedRowsB = 1; - unsigned EstimatedColumns = 1; + unsigned EstimatedColumns = F.arg_end() - F.arg_begin() + 1; ConstraintInfo Info(F.getDataLayout(), {}); + // We abuse the Value2Index map by storing a map of Value -> size of + // decompose(Value).Vars. We do this to avoid wastefully calling decompose + // on chains we have already seen. + // + // Function arguments don't decompose into anything non-trivial. + auto &DecompSizes = Info.getValue2Index(true); + for (Value &A : F.args()) + DecompSizes.insert({&A, 1}); + // First, collect conditions implied by branches and blocks with their // Dominator DFS in and out numbers. for (BasicBlock &BB : F) { From 0a2091a11cb0dee1ca6af72e6644050e8ef37f70 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 29 Jul 2024 16:43:32 +0100 Subject: [PATCH 6/6] ConstraintElim: try 1.6 factor --- llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 40d117d49489e..d494d75323d41 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1897,8 +1897,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI, const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE); // Fail early if estimates exceed limits. Row estimate could be off by up to - // 40%. - if (EstimatedRows > 1.4 * MaxRows || EstimatedColumns > MaxColumns) + // 60%. + if (EstimatedRows > 1.6 * MaxRows || EstimatedColumns > MaxColumns) return false; SmallVector FunctionArgs;