Skip to content

Commit 47c8510

Browse files
committed
ConstraintElim: add dry-run routine to fail early
Add a dry-run routine that computes a conservative estimate of the number of rows and columns that the transform will require, and fail early if the estimates exceed the upper bounds. This patch has a small overhead, but improves compile-time on one benchmark significantly. The overhead will be compensated for in a follow-up patch, where ConstraintSystem is ported to use a Matrix data structure, performing the full allocation ahead-of-time using these estimates.
1 parent 76e2307 commit 47c8510

File tree

2 files changed

+174
-18
lines changed

2 files changed

+174
-18
lines changed

llvm/lib/Transforms/Scalar/ConstraintElimination.cpp

Lines changed: 165 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
#include "llvm/Transforms/Utils/Cloning.h"
4141
#include "llvm/Transforms/Utils/ValueMapper.h"
4242

43-
#include <cmath>
4443
#include <optional>
4544
#include <string>
4645

@@ -57,6 +56,10 @@ static cl::opt<unsigned>
5756
MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
5857
cl::desc("Maximum number of rows to keep in constraint system"));
5958

59+
static cl::opt<unsigned> MaxColumns(
60+
"constraint-elimination-max-cols", cl::init(50), cl::Hidden,
61+
cl::desc("Maximum number of columns to keep in constraint system"));
62+
6063
static cl::opt<bool> DumpReproducers(
6164
"constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
6265
cl::desc("Dump IR to reproduce successful transformations."));
@@ -303,6 +306,7 @@ class ConstraintInfo {
303306
void popLastNVariables(bool Signed, unsigned N) {
304307
getCS(Signed).popLastNVariables(N);
305308
}
309+
const DataLayout &getDataLayout() const { return DL; }
306310

307311
bool doesHold(CmpInst::Predicate Pred, Value *A, Value *B) const;
308312

@@ -1491,7 +1495,7 @@ removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
14911495
/// Check if either the first condition of an AND or OR is implied by the
14921496
/// (negated in case of OR) second condition or vice versa.
14931497
static bool checkOrAndOpImpliedByOther(
1494-
FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
1498+
const FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
14951499
SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
14961500
SmallVectorImpl<StackEntry> &DFSInStack) {
14971501

@@ -1671,18 +1675,91 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
16711675
return Changed;
16721676
}
16731677

1674-
static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
1675-
ScalarEvolution &SE,
1676-
OptimizationRemarkEmitter &ORE) {
1677-
bool Changed = false;
1678+
/// Performs a dry run of AddFact, computing a conservative estimate of the
1679+
/// number of new variables introduced.
1680+
static void dryRunAddFact(CmpInst::Predicate Pred, Value *A, Value *B,
1681+
const ConstraintInfo &Info, unsigned &EstimatedRowsA,
1682+
unsigned &EstimatedRowsB,
1683+
unsigned &EstimatedColumns) {
1684+
auto UpdateEstimate = [&Info, &EstimatedRowsA, &EstimatedRowsB,
1685+
&EstimatedColumns](CmpInst::Predicate Pred, Value *A,
1686+
Value *B) {
1687+
SmallVector<Value *> NewVars;
1688+
auto R = Info.getConstraint(Pred, A, B, NewVars);
1689+
1690+
// We offset it by 1 due to logic in addFact.
1691+
unsigned NewEstimate =
1692+
count_if(R.Coefficients, [](int64_t C) { return C != 0; }) + 1;
1693+
1694+
EstimatedColumns = std::max(EstimatedColumns, NewEstimate);
1695+
if (R.IsSigned)
1696+
++EstimatedRowsA;
1697+
else
1698+
++EstimatedRowsB;
1699+
};
1700+
1701+
UpdateEstimate(Pred, A, B);
1702+
1703+
// What follows is a dry-run of transferToOtherSystem.
1704+
auto IsKnownNonNegative = [&Info](Value *V) {
1705+
return Info.doesHold(CmpInst::ICMP_SGE, V,
1706+
ConstantInt::get(V->getType(), 0)) ||
1707+
isKnownNonNegative(V, Info.getDataLayout(),
1708+
MaxAnalysisRecursionDepth - 1);
1709+
};
1710+
1711+
if (!A->getType()->isIntegerTy())
1712+
return;
1713+
1714+
switch (Pred) {
1715+
default:
1716+
break;
1717+
case CmpInst::ICMP_ULT:
1718+
case CmpInst::ICMP_ULE:
1719+
if (IsKnownNonNegative(B)) {
1720+
UpdateEstimate(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0));
1721+
UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
1722+
}
1723+
break;
1724+
case CmpInst::ICMP_UGE:
1725+
case CmpInst::ICMP_UGT:
1726+
if (IsKnownNonNegative(A)) {
1727+
UpdateEstimate(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0));
1728+
UpdateEstimate(CmpInst::getSignedPredicate(Pred), A, B);
1729+
}
1730+
break;
1731+
case CmpInst::ICMP_SLT:
1732+
if (IsKnownNonNegative(A))
1733+
UpdateEstimate(CmpInst::ICMP_ULT, A, B);
1734+
break;
1735+
case CmpInst::ICMP_SGT:
1736+
if (Info.doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
1737+
UpdateEstimate(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0));
1738+
if (IsKnownNonNegative(B))
1739+
UpdateEstimate(CmpInst::ICMP_UGT, A, B);
1740+
break;
1741+
case CmpInst::ICMP_SGE:
1742+
if (IsKnownNonNegative(B))
1743+
UpdateEstimate(CmpInst::ICMP_UGE, A, B);
1744+
break;
1745+
}
1746+
}
1747+
1748+
/// Performs a dry run of the transform, computing a conservative estimate of
1749+
/// the total number of columns we need in the underlying storage.
1750+
static std::tuple<State, unsigned, unsigned>
1751+
dryRun(Function &F, DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE) {
16781752
DT.updateDFSNumbers();
16791753
SmallVector<Value *> FunctionArgs;
16801754
for (Value &Arg : F.args())
16811755
FunctionArgs.push_back(&Arg);
1682-
ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
16831756
State S(DT, LI, SE);
1684-
std::unique_ptr<Module> ReproducerModule(
1685-
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
1757+
unsigned EstimatedColumns = FunctionArgs.size() + 1;
1758+
1759+
// EstimatedRowsA corresponds to SignedCS, and EstimatedRowsB corresponds to
1760+
// UnsignedCS.
1761+
unsigned EstimatedRowsA = 0, EstimatedRowsB = 1;
1762+
ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
16861763

16871764
// First, collect conditions implied by branches and blocks with their
16881765
// Dominator DFS in and out numbers.
@@ -1725,12 +1802,90 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
17251802
return A.NumIn < B.NumIn;
17261803
});
17271804

1805+
for (const FactOrCheck &CB : S.WorkList) {
1806+
ICmpInst::Predicate Pred;
1807+
Value *A, *B;
1808+
if (CB.isCheck()) {
1809+
// What follows is a dry-run of checkOrAndOpImpliedByOther, without
1810+
// assuming that instructions have been simplified, as they would have
1811+
// during the course of normal operation.
1812+
auto *ContextInst = CB.getContextInst();
1813+
if (auto *Cmp =
1814+
dyn_cast_or_null<ICmpInst>(CB.getInstructionToSimplify())) {
1815+
unsigned OtherOpIdx = ContextInst->getOperand(0) == Cmp ? 1 : 0;
1816+
if (match(ContextInst, m_LogicalOp()) &&
1817+
match(ContextInst->getOperand(OtherOpIdx),
1818+
m_ICmp(Pred, m_Value(A), m_Value(B)))) {
1819+
if (match(ContextInst, m_LogicalOr()))
1820+
Pred = CmpInst::getInversePredicate(Pred);
1821+
dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
1822+
EstimatedColumns);
1823+
}
1824+
}
1825+
continue;
1826+
}
1827+
if (!CB.isConditionFact()) {
1828+
Value *X;
1829+
if (match(CB.Inst, m_Intrinsic<Intrinsic::abs>(m_Value(X)))) {
1830+
if (cast<ConstantInt>(CB.Inst->getOperand(1))->isOne())
1831+
dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst,
1832+
ConstantInt::get(CB.Inst->getType(), 0), Info,
1833+
EstimatedRowsA, EstimatedRowsB, EstimatedColumns);
1834+
dryRunAddFact(CmpInst::ICMP_SGE, CB.Inst, X, Info, EstimatedRowsA,
1835+
EstimatedRowsB, EstimatedColumns);
1836+
continue;
1837+
}
1838+
1839+
if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
1840+
Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
1841+
dryRunAddFact(Pred, MinMax, MinMax->getLHS(), Info, EstimatedRowsA,
1842+
EstimatedRowsB, EstimatedColumns);
1843+
dryRunAddFact(Pred, MinMax, MinMax->getRHS(), Info, EstimatedRowsA,
1844+
EstimatedRowsB, EstimatedColumns);
1845+
continue;
1846+
}
1847+
}
1848+
1849+
if (CB.isConditionFact()) {
1850+
Pred = CB.Cond.Pred;
1851+
A = CB.Cond.Op0;
1852+
B = CB.Cond.Op1;
1853+
} else {
1854+
bool Matched = match(CB.Inst, m_Intrinsic<Intrinsic::assume>(
1855+
m_ICmp(Pred, m_Value(A), m_Value(B))));
1856+
(void)Matched;
1857+
assert(Matched && "Must have an assume intrinsic with a icmp operand");
1858+
}
1859+
dryRunAddFact(Pred, A, B, Info, EstimatedRowsA, EstimatedRowsB,
1860+
EstimatedColumns);
1861+
}
1862+
return {S, std::max(EstimatedRowsA, EstimatedRowsB), EstimatedColumns};
1863+
}
1864+
1865+
static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
1866+
ScalarEvolution &SE,
1867+
OptimizationRemarkEmitter &ORE) {
1868+
bool Changed = false;
1869+
const auto &[S, EstimatedRows, EstimatedColumns] = dryRun(F, DT, LI, SE);
1870+
1871+
// Fail early if estimates exceed limits. Row estimate could be off by up to
1872+
// 40%.
1873+
if (EstimatedRows > 1.4 * MaxRows || EstimatedColumns > MaxColumns)
1874+
return false;
1875+
1876+
SmallVector<Value *> FunctionArgs;
1877+
for (Value &Arg : F.args())
1878+
FunctionArgs.push_back(&Arg);
1879+
ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
1880+
std::unique_ptr<Module> ReproducerModule(
1881+
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
1882+
17281883
SmallVector<Instruction *> ToRemove;
17291884

17301885
// Finally, process ordered worklist and eliminate implied conditions.
17311886
SmallVector<StackEntry, 16> DFSInStack;
17321887
SmallVector<ReproducerEntry> ReproducerCondStack;
1733-
for (FactOrCheck &CB : S.WorkList) {
1888+
for (const FactOrCheck &CB : S.WorkList) {
17341889
// First, pop entries from the stack that are out-of-scope for CB. Remove
17351890
// the corresponding entry from the constraint system.
17361891
while (!DFSInStack.empty()) {

llvm/test/Transforms/ConstraintElimination/max-row-limit.ll renamed to llvm/test/Transforms/ConstraintElimination/max-row-column-limit.ll

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
3-
; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=9 -S %s | FileCheck --check-prefixes=COMMON,SIMP %s
4-
; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=COMMON,NOSIMP %s
2+
; RUN: opt -passes=constraint-elimination -S %s | FileCheck --check-prefixes=SIMP %s
3+
; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=8 -S %s | FileCheck --check-prefixes=SIMP %s
4+
; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=6 -S %s | FileCheck --check-prefixes=SIMP %s
5+
; RUN: opt -passes=constraint-elimination -constraint-elimination-max-rows=7 -S %s | FileCheck --check-prefixes=NOSIMP %s
6+
; RUN: opt -passes=constraint-elimination -constraint-elimination-max-cols=5 -S %s | FileCheck --check-prefixes=NOSIMP %s
57

68

79
define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
@@ -22,7 +24,8 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
2224
; SIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
2325
; SIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
2426
; SIMP: bb5:
25-
; SIMP-NEXT: ret i1 true
27+
; SIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4:%.*]], 100
28+
; SIMP-NEXT: ret i1 [[C5]]
2629
; SIMP: exit:
2730
; SIMP-NEXT: ret i1 false
2831
;
@@ -43,7 +46,7 @@ define i1 @test_max_row_limit(i32 %l0, i32 %l1, i32 %l2, i32 %l3, i32 %l4) {
4346
; NOSIMP-NEXT: [[C4:%.*]] = icmp uge i32 [[L4:%.*]], 100
4447
; NOSIMP-NEXT: br i1 [[C4]], label [[BB5:%.*]], label [[EXIT]]
4548
; NOSIMP: bb5:
46-
; NOSIMP-NEXT: [[C5:%.*]] = icmp uge i32 [[L4]], 100
49+
; NOSIMP-NEXT: [[C5:%.*]] = icmp sge i32 [[L4]], 100
4750
; NOSIMP-NEXT: ret i1 [[C5]]
4851
; NOSIMP: exit:
4952
; NOSIMP-NEXT: ret i1 false
@@ -69,11 +72,9 @@ bb4:
6972
br i1 %c4, label %bb5, label %exit
7073

7174
bb5:
72-
%c5 = icmp uge i32 %l4, 100
75+
%c5 = icmp sge i32 %l4, 100
7376
ret i1 %c5
7477

7578
exit:
7679
ret i1 false
7780
}
78-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
79-
; COMMON: {{.*}}

0 commit comments

Comments
 (0)