Skip to content

Commit 89f53af

Browse files
authored
[ConstraintElim] Use constraints from bounded memory accesses (#155253)
This patch removes bound checks that are dominated by bounded memory accesses. For example, if we have an array `int A[5]` and `A[idx]` is performed successfully, we know that `idx u< 5` after the load. compile-time impact (+0.1%): https://llvm-compile-time-tracker.com/compare.php?from=f0e9bba024d44b55d54b02025623ce4a3ba5a37c&to=5227b08a4a514159ec524d1b1ca18ed8ab5407df&stat=instructions%3Au llvm-opt-benchmark: dtcxzyw/llvm-opt-benchmark#2709 Proof: https://alive2.llvm.org/ce/z/JEyjA2
1 parent 417bdb6 commit 89f53af

File tree

2 files changed

+478
-7
lines changed

2 files changed

+478
-7
lines changed

llvm/lib/Transforms/Scalar/ConstraintElimination.cpp

Lines changed: 105 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919
#include "llvm/Analysis/ConstraintSystem.h"
2020
#include "llvm/Analysis/GlobalsModRef.h"
2121
#include "llvm/Analysis/LoopInfo.h"
22+
#include "llvm/Analysis/MemoryBuiltins.h"
2223
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
2324
#include "llvm/Analysis/ScalarEvolution.h"
2425
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26+
#include "llvm/Analysis/TargetLibraryInfo.h"
2527
#include "llvm/Analysis/ValueTracking.h"
2628
#include "llvm/IR/DataLayout.h"
2729
#include "llvm/IR/DebugInfo.h"
@@ -170,10 +172,12 @@ struct State {
170172
DominatorTree &DT;
171173
LoopInfo &LI;
172174
ScalarEvolution &SE;
175+
TargetLibraryInfo &TLI;
173176
SmallVector<FactOrCheck, 64> WorkList;
174177

175-
State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE)
176-
: DT(DT), LI(LI), SE(SE) {}
178+
State(DominatorTree &DT, LoopInfo &LI, ScalarEvolution &SE,
179+
TargetLibraryInfo &TLI)
180+
: DT(DT), LI(LI), SE(SE), TLI(TLI) {}
177181

178182
/// Process block \p BB and add known facts to work-list.
179183
void addInfoFor(BasicBlock &BB);
@@ -1109,10 +1113,50 @@ void State::addInfoForInductions(BasicBlock &BB) {
11091113
}
11101114
}
11111115

1116+
static bool getConstraintFromMemoryAccess(GetElementPtrInst &GEP,
1117+
uint64_t AccessSize,
1118+
CmpPredicate &Pred, Value *&A,
1119+
Value *&B, const DataLayout &DL,
1120+
const TargetLibraryInfo &TLI) {
1121+
auto Offset = collectOffsets(cast<GEPOperator>(GEP), DL);
1122+
if (!Offset.NW.hasNoUnsignedWrap())
1123+
return false;
1124+
1125+
if (Offset.VariableOffsets.size() != 1)
1126+
return false;
1127+
1128+
ObjectSizeOpts Opts;
1129+
// Workaround for gep inbounds, ptr null, idx.
1130+
Opts.NullIsUnknownSize = true;
1131+
// Be conservative since we are not clear on whether an out of bounds access
1132+
// to the padding is UB or not.
1133+
Opts.RoundToAlign = true;
1134+
std::optional<TypeSize> Size =
1135+
getBaseObjectSize(Offset.BasePtr, DL, &TLI, Opts);
1136+
if (!Size || Size->isScalable())
1137+
return false;
1138+
1139+
// Index * Scale + ConstOffset + AccessSize <= AllocSize
1140+
// With nuw flag, we know that the index addition doesn't have unsigned wrap.
1141+
// If (AllocSize - (ConstOffset + AccessSize)) wraps around, there is no valid
1142+
// value for Index.
1143+
uint64_t BitWidth = Offset.ConstantOffset.getBitWidth();
1144+
auto &[Index, Scale] = Offset.VariableOffsets.front();
1145+
APInt MaxIndex = (APInt(BitWidth, Size->getFixedValue() - AccessSize,
1146+
/*isSigned=*/false, /*implicitTrunc=*/true) -
1147+
Offset.ConstantOffset)
1148+
.udiv(Scale);
1149+
Pred = ICmpInst::ICMP_ULE;
1150+
A = Index;
1151+
B = ConstantInt::get(Index->getType(), MaxIndex);
1152+
return true;
1153+
}
1154+
11121155
void State::addInfoFor(BasicBlock &BB) {
11131156
addInfoForInductions(BB);
1157+
auto &DL = BB.getDataLayout();
11141158

1115-
// True as long as long as the current instruction is guaranteed to execute.
1159+
// True as long as the current instruction is guaranteed to execute.
11161160
bool GuaranteedToExecute = true;
11171161
// Queue conditions and assumes.
11181162
for (Instruction &I : BB) {
@@ -1127,6 +1171,38 @@ void State::addInfoFor(BasicBlock &BB) {
11271171
continue;
11281172
}
11291173

1174+
auto AddFactFromMemoryAccess = [&](Value *Ptr, Type *AccessType) {
1175+
auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1176+
if (!GEP)
1177+
return;
1178+
TypeSize AccessSize = DL.getTypeStoreSize(AccessType);
1179+
if (!AccessSize.isFixed())
1180+
return;
1181+
if (GuaranteedToExecute) {
1182+
CmpPredicate Pred;
1183+
Value *A, *B;
1184+
if (getConstraintFromMemoryAccess(*GEP, AccessSize.getFixedValue(),
1185+
Pred, A, B, DL, TLI)) {
1186+
// The memory access is guaranteed to execute when BB is entered,
1187+
// hence the constraint holds on entry to BB.
1188+
WorkList.emplace_back(FactOrCheck::getConditionFact(
1189+
DT.getNode(I.getParent()), Pred, A, B));
1190+
}
1191+
} else {
1192+
WorkList.emplace_back(
1193+
FactOrCheck::getInstFact(DT.getNode(I.getParent()), &I));
1194+
}
1195+
};
1196+
1197+
if (auto *LI = dyn_cast<LoadInst>(&I)) {
1198+
if (!LI->isVolatile())
1199+
AddFactFromMemoryAccess(LI->getPointerOperand(), LI->getAccessType());
1200+
}
1201+
if (auto *SI = dyn_cast<StoreInst>(&I)) {
1202+
if (!SI->isVolatile())
1203+
AddFactFromMemoryAccess(SI->getPointerOperand(), SI->getAccessType());
1204+
}
1205+
11301206
auto *II = dyn_cast<IntrinsicInst>(&I);
11311207
Intrinsic::ID ID = II ? II->getIntrinsicID() : Intrinsic::not_intrinsic;
11321208
switch (ID) {
@@ -1420,7 +1496,7 @@ static std::optional<bool> checkCondition(CmpInst::Predicate Pred, Value *A,
14201496
LLVM_DEBUG(dbgs() << "Checking " << *CheckInst << "\n");
14211497

14221498
auto R = Info.getConstraintForSolving(Pred, A, B);
1423-
if (R.empty() || !R.isValid(Info)){
1499+
if (R.empty() || !R.isValid(Info)) {
14241500
LLVM_DEBUG(dbgs() << " failed to decompose condition\n");
14251501
return std::nullopt;
14261502
}
@@ -1785,12 +1861,13 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
17851861

17861862
static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
17871863
ScalarEvolution &SE,
1788-
OptimizationRemarkEmitter &ORE) {
1864+
OptimizationRemarkEmitter &ORE,
1865+
TargetLibraryInfo &TLI) {
17891866
bool Changed = false;
17901867
DT.updateDFSNumbers();
17911868
SmallVector<Value *> FunctionArgs(llvm::make_pointer_range(F.args()));
17921869
ConstraintInfo Info(F.getDataLayout(), FunctionArgs);
1793-
State S(DT, LI, SE);
1870+
State S(DT, LI, SE, TLI);
17941871
std::unique_ptr<Module> ReproducerModule(
17951872
DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
17961873

@@ -1960,6 +2037,26 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT, LoopInfo &LI,
19602037
}
19612038
continue;
19622039
}
2040+
2041+
auto &DL = F.getDataLayout();
2042+
auto AddFactsAboutIndices = [&](Value *Ptr, Type *AccessType) {
2043+
CmpPredicate Pred;
2044+
Value *A, *B;
2045+
if (getConstraintFromMemoryAccess(
2046+
*cast<GetElementPtrInst>(Ptr),
2047+
DL.getTypeStoreSize(AccessType).getFixedValue(), Pred, A, B, DL,
2048+
TLI))
2049+
AddFact(Pred, A, B);
2050+
};
2051+
2052+
if (auto *LI = dyn_cast<LoadInst>(CB.Inst)) {
2053+
AddFactsAboutIndices(LI->getPointerOperand(), LI->getAccessType());
2054+
continue;
2055+
}
2056+
if (auto *SI = dyn_cast<StoreInst>(CB.Inst)) {
2057+
AddFactsAboutIndices(SI->getPointerOperand(), SI->getAccessType());
2058+
continue;
2059+
}
19632060
}
19642061

19652062
Value *A = nullptr, *B = nullptr;
@@ -2018,7 +2115,8 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F,
20182115
auto &LI = AM.getResult<LoopAnalysis>(F);
20192116
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
20202117
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2021-
if (!eliminateConstraints(F, DT, LI, SE, ORE))
2118+
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
2119+
if (!eliminateConstraints(F, DT, LI, SE, ORE, TLI))
20222120
return PreservedAnalyses::all();
20232121

20242122
PreservedAnalyses PA;

0 commit comments

Comments
 (0)