Skip to content

Commit dd3507b

Browse files
authored
Extend LVI to cache ranges per BB predecessor. (llvm#159432)
Currently LVI does the union of value ranges from block predecessors. When storing the ranges per predecessor, the resulting ranges may be more restricted and enable additional optimizations. However this is costly (memory + compile time), so place this under a flag disabled by default. See: llvm#158139.
1 parent 1ef1175 commit dd3507b

File tree

2 files changed

+235
-3
lines changed

2 files changed

+235
-3
lines changed

llvm/lib/Analysis/LazyValueInfo.cpp

Lines changed: 137 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
5959
INITIALIZE_PASS_END(LazyValueInfoWrapperPass, "lazy-value-info",
6060
"Lazy Value Information Analysis", false, true)
6161

62+
static cl::opt<bool> PerPredRanges(
63+
"lvi-per-pred-ranges", cl::Hidden, cl::init(false),
64+
cl::desc("Enable tracking of ranges for a value in a block for"
65+
"each block predecessor (default = false)"));
66+
6267
namespace llvm {
6368
FunctionPass *createLazyValueInfoPass() {
6469
return new LazyValueInfoWrapperPass();
@@ -103,6 +108,10 @@ namespace {
103108

104109
namespace {
105110
using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>;
111+
using BBLatticeElementMap =
112+
SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4>;
113+
using PredecessorValueLatticeMap =
114+
SmallDenseMap<AssertingVH<Value>, BBLatticeElementMap, 2>;
106115

107116
/// This is the cache kept by LazyValueInfo which
108117
/// maintains information about queries across the clients' queries.
@@ -117,6 +126,10 @@ class LazyValueInfoCache {
117126
// std::nullopt indicates that the nonnull pointers for this basic block
118127
// block have not been computed yet.
119128
std::optional<NonNullPointerSet> NonNullPointers;
129+
// This is an extension of the above LatticeElements, caching, for each
130+
// Value, a ValueLatticeElement, for each predecessor of the BB tracked by
131+
// this entry.
132+
std::optional<PredecessorValueLatticeMap> PredecessorLatticeElements;
120133
};
121134

122135
/// Cached information per basic block.
@@ -134,8 +147,14 @@ class LazyValueInfoCache {
134147

135148
BlockCacheEntry *getOrCreateBlockEntry(BasicBlock *BB) {
136149
auto It = BlockCache.find_as(BB);
137-
if (It == BlockCache.end())
138-
It = BlockCache.insert({BB, std::make_unique<BlockCacheEntry>()}).first;
150+
if (It == BlockCache.end()) {
151+
std::unique_ptr<BlockCacheEntry> BCE =
152+
std::make_unique<BlockCacheEntry>();
153+
if (PerPredRanges)
154+
BCE->PredecessorLatticeElements =
155+
std::make_optional<PredecessorValueLatticeMap>();
156+
It = BlockCache.insert({BB, std::move(BCE)}).first;
157+
}
139158

140159
return It->second.get();
141160
}
@@ -161,6 +180,28 @@ class LazyValueInfoCache {
161180
addValueHandle(Val);
162181
}
163182

183+
void insertPredecessorResults(Value *Val, BasicBlock *BB,
184+
BBLatticeElementMap &PredLatticeElements) {
185+
BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
186+
187+
Entry->PredecessorLatticeElements->insert({Val, PredLatticeElements});
188+
189+
addValueHandle(Val);
190+
}
191+
192+
std::optional<BBLatticeElementMap>
193+
getCachedPredecessorInfo(Value *V, BasicBlock *BB) const {
194+
const BlockCacheEntry *Entry = getBlockEntry(BB);
195+
if (!Entry)
196+
return std::nullopt;
197+
198+
auto LatticeIt = Entry->PredecessorLatticeElements->find_as(V);
199+
if (LatticeIt == Entry->PredecessorLatticeElements->end())
200+
return std::nullopt;
201+
202+
return LatticeIt->second;
203+
}
204+
164205
std::optional<ValueLatticeElement> getCachedValueInfo(Value *V,
165206
BasicBlock *BB) const {
166207
const BlockCacheEntry *Entry = getBlockEntry(BB);
@@ -216,6 +257,8 @@ void LazyValueInfoCache::eraseValue(Value *V) {
216257
Pair.second->OverDefined.erase(V);
217258
if (Pair.second->NonNullPointers)
218259
Pair.second->NonNullPointers->erase(V);
260+
if (PerPredRanges)
261+
Pair.second->PredecessorLatticeElements->erase(V);
219262
}
220263

221264
auto HandleIt = ValueHandles.find_as(V);
@@ -230,6 +273,10 @@ void LVIValueHandle::deleted() {
230273
}
231274

232275
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
276+
// Clear all when a BB is removed.
277+
if (PerPredRanges)
278+
for (auto &Pair : BlockCache)
279+
Pair.second->PredecessorLatticeElements->clear();
233280
BlockCache.erase(BB);
234281
}
235282

@@ -691,6 +738,9 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) {
691738
// find a path to function entry. TODO: We should consider explicitly
692739
// canonicalizing to make this true rather than relying on this happy
693740
// accident.
741+
std::optional<BBLatticeElementMap> PredLatticeElements;
742+
if (PerPredRanges)
743+
PredLatticeElements = std::make_optional<BBLatticeElementMap>();
694744
for (BasicBlock *Pred : predecessors(BB)) {
695745
// Skip self loops.
696746
if (Pred == BB)
@@ -710,8 +760,13 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) {
710760
<< Pred->getName() << "' (non local).\n");
711761
return Result;
712762
}
763+
if (PerPredRanges)
764+
PredLatticeElements->insert({Pred, *EdgeResult});
713765
}
714766

767+
if (PerPredRanges)
768+
TheCache.insertPredecessorResults(Val, BB, *PredLatticeElements);
769+
715770
// Return the merged value, which is more precise than 'overdefined'.
716771
assert(!Result.isOverdefined());
717772
return Result;
@@ -724,6 +779,9 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
724779
// Loop over all of our predecessors, merging what we know from them into
725780
// result. See the comment about the chosen traversal order in
726781
// solveBlockValueNonLocal; the same reasoning applies here.
782+
std::optional<BBLatticeElementMap> PredLatticeElements;
783+
if (PerPredRanges)
784+
PredLatticeElements = std::make_optional<BBLatticeElementMap>();
727785
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
728786
BasicBlock *PhiBB = PN->getIncomingBlock(i);
729787
Value *PhiVal = PN->getIncomingValue(i);
@@ -746,8 +804,14 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
746804

747805
return Result;
748806
}
807+
808+
if (PerPredRanges)
809+
PredLatticeElements->insert({PhiBB, *EdgeResult});
749810
}
750811

812+
if (PerPredRanges)
813+
TheCache.insertPredecessorResults(PN, BB, *PredLatticeElements);
814+
751815
// Return the merged value, which is more precise than 'overdefined'.
752816
assert(!Result.isOverdefined() && "Possible PHI in entry block?");
753817
return Result;
@@ -1002,7 +1066,77 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
10021066

10031067
const ConstantRange &LHSRange = *LHSRes;
10041068
const ConstantRange &RHSRange = *RHSRes;
1005-
return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
1069+
1070+
std::optional<ValueLatticeElement> MergedResult =
1071+
ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
1072+
1073+
if (!PerPredRanges)
1074+
return MergedResult;
1075+
1076+
std::optional<BBLatticeElementMap> PredLHS =
1077+
TheCache.getCachedPredecessorInfo(LHS, BB);
1078+
if (!PredLHS)
1079+
return MergedResult;
1080+
std::optional<BBLatticeElementMap> PredRHS =
1081+
TheCache.getCachedPredecessorInfo(RHS, BB);
1082+
if (!PredRHS)
1083+
return MergedResult;
1084+
1085+
const BBLatticeElementMap &LHSPredMap = *PredLHS;
1086+
const BBLatticeElementMap &RHSPredMap = *PredRHS;
1087+
1088+
BBLatticeElementMap PredLatticeElements;
1089+
ValueLatticeElement OverallPredResult;
1090+
for (auto *Pred : predecessors(BB)) {
1091+
auto LHSIt = LHSPredMap.find_as(Pred);
1092+
if (LHSIt == LHSPredMap.end())
1093+
return MergedResult;
1094+
const ValueLatticeElement &LHSFromPred = LHSIt->second;
1095+
std::optional<ConstantRange> LHSFromPredRes =
1096+
LHSFromPred.asConstantRange(LHS->getType());
1097+
if (!LHSFromPredRes)
1098+
return MergedResult;
1099+
1100+
auto RHSIt = RHSPredMap.find_as(Pred);
1101+
if (RHSIt == RHSPredMap.end())
1102+
return MergedResult;
1103+
const ValueLatticeElement &RHSFromPred = RHSIt->second;
1104+
std::optional<ConstantRange> RHSFromPredRes =
1105+
RHSFromPred.asConstantRange(RHS->getType());
1106+
if (!RHSFromPredRes)
1107+
return MergedResult;
1108+
1109+
const ConstantRange &LHSFromPredRange = *LHSFromPredRes;
1110+
const ConstantRange &RHSFromPredRange = *RHSFromPredRes;
1111+
std::optional<ValueLatticeElement> PredResult =
1112+
ValueLatticeElement::getRange(OpFn(LHSFromPredRange, RHSFromPredRange));
1113+
if (!PredResult)
1114+
return MergedResult;
1115+
if (PredResult->isOverdefined()) {
1116+
LLVM_DEBUG(
1117+
dbgs() << " pred BB '" << Pred->getName() << "' for BB '"
1118+
<< BB->getName()
1119+
<< "' overdefined. Discarding all predecessor intervals.\n");
1120+
return MergedResult;
1121+
}
1122+
PredLatticeElements.insert({Pred, *PredResult});
1123+
OverallPredResult.mergeIn(*PredResult);
1124+
}
1125+
1126+
// If this point is reached, all predecessors for both LHS and RHS have
1127+
// constant ranges previously computed. Can cache result and use the
1128+
// OverallPredResult;
1129+
TheCache.insertPredecessorResults(I, BB, PredLatticeElements);
1130+
1131+
LLVM_DEBUG(dbgs() << " Using predecessor intervals, evaluated " << *I
1132+
<< " to: " << OverallPredResult << ".\n");
1133+
1134+
if (!MergedResult)
1135+
return OverallPredResult;
1136+
1137+
LLVM_DEBUG(dbgs() << " Intersecting intervals for " << *I << ": "
1138+
<< OverallPredResult << " and " << MergedResult << ".\n");
1139+
return MergedResult->intersect(OverallPredResult);
10061140
}
10071141

10081142
std::optional<ValueLatticeElement>
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes="correlated-propagation" -S 2>&1 | FileCheck %s
3+
; RUN: opt < %s -passes="correlated-propagation" -lvi-per-pred-ranges -S 2>&1 | FileCheck %s -check-prefix=LVI-PRED-RANGES
4+
5+
@global = external local_unnamed_addr global [4338 x i32], align 16
6+
7+
define dso_local noundef zeroext i1 @bar(i64 noundef %arg, ptr noundef writeonly captures(none) %arg1) local_unnamed_addr {
8+
; CHECK-LABEL: define dso_local noundef zeroext i1 @bar(
9+
; CHECK-SAME: i64 noundef [[ARG:%.*]], ptr noundef writeonly captures(none) [[ARG1:%.*]]) local_unnamed_addr {
10+
; CHECK-NEXT: [[BB:.*]]:
11+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[ARG]], 1025
12+
; CHECK-NEXT: br i1 [[ICMP]], label %[[BB4:.*]], label %[[BB2:.*]]
13+
; CHECK: [[BB2]]:
14+
; CHECK-NEXT: [[ICMP3:%.*]] = icmp ult i64 [[ARG]], 262145
15+
; CHECK-NEXT: br i1 [[ICMP3]], label %[[BB4]], label %[[BB9:.*]]
16+
; CHECK: [[BB4]]:
17+
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 7, %[[BB]] ], [ 15487, %[[BB2]] ]
18+
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 3, %[[BB]] ], [ 7, %[[BB2]] ]
19+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[PHI]], [[ARG]]
20+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i64 [[ADD]], [[PHI5]]
21+
; CHECK-NEXT: [[ICMP6:%.*]] = icmp samesign ult i64 [[LSHR]], 4338
22+
; CHECK-NEXT: br i1 [[ICMP6]], label %[[BB8:.*]], label %[[BB7:.*]]
23+
; CHECK: [[BB7]]:
24+
; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 18)
25+
; CHECK-NEXT: unreachable
26+
; CHECK: [[BB8]]:
27+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 [[LSHR]]
28+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4
29+
; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD]] to i64
30+
; CHECK-NEXT: store i64 [[SEXT]], ptr [[ARG1]], align 8
31+
; CHECK-NEXT: br label %[[BB9]]
32+
; CHECK: [[BB9]]:
33+
; CHECK-NEXT: [[PHI10:%.*]] = phi i1 [ true, %[[BB8]] ], [ false, %[[BB2]] ]
34+
; CHECK-NEXT: ret i1 [[PHI10]]
35+
;
36+
; LVI-PRED-RANGES-LABEL: define dso_local noundef zeroext i1 @bar(
37+
; LVI-PRED-RANGES-SAME: i64 noundef [[ARG:%.*]], ptr noundef writeonly captures(none) [[ARG1:%.*]]) local_unnamed_addr {
38+
; LVI-PRED-RANGES-NEXT: [[BB:.*]]:
39+
; LVI-PRED-RANGES-NEXT: [[ICMP:%.*]] = icmp ult i64 [[ARG]], 1025
40+
; LVI-PRED-RANGES-NEXT: br i1 [[ICMP]], label %[[BB4:.*]], label %[[BB2:.*]]
41+
; LVI-PRED-RANGES: [[BB2]]:
42+
; LVI-PRED-RANGES-NEXT: [[ICMP3:%.*]] = icmp ult i64 [[ARG]], 262145
43+
; LVI-PRED-RANGES-NEXT: br i1 [[ICMP3]], label %[[BB4]], label %[[BB9:.*]]
44+
; LVI-PRED-RANGES: [[BB4]]:
45+
; LVI-PRED-RANGES-NEXT: [[PHI:%.*]] = phi i64 [ 7, %[[BB]] ], [ 15487, %[[BB2]] ]
46+
; LVI-PRED-RANGES-NEXT: [[PHI5:%.*]] = phi i64 [ 3, %[[BB]] ], [ 7, %[[BB2]] ]
47+
; LVI-PRED-RANGES-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[PHI]], [[ARG]]
48+
; LVI-PRED-RANGES-NEXT: [[LSHR:%.*]] = lshr i64 [[ADD]], [[PHI5]]
49+
; LVI-PRED-RANGES-NEXT: br i1 true, label %[[BB8:.*]], label %[[BB7:.*]]
50+
; LVI-PRED-RANGES: [[BB7]]:
51+
; LVI-PRED-RANGES-NEXT: tail call void @llvm.ubsantrap(i8 18)
52+
; LVI-PRED-RANGES-NEXT: unreachable
53+
; LVI-PRED-RANGES: [[BB8]]:
54+
; LVI-PRED-RANGES-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 [[LSHR]]
55+
; LVI-PRED-RANGES-NEXT: [[LOAD:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4
56+
; LVI-PRED-RANGES-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD]] to i64
57+
; LVI-PRED-RANGES-NEXT: store i64 [[SEXT]], ptr [[ARG1]], align 8
58+
; LVI-PRED-RANGES-NEXT: br label %[[BB9]]
59+
; LVI-PRED-RANGES: [[BB9]]:
60+
; LVI-PRED-RANGES-NEXT: [[PHI10:%.*]] = phi i1 [ true, %[[BB8]] ], [ false, %[[BB2]] ]
61+
; LVI-PRED-RANGES-NEXT: ret i1 [[PHI10]]
62+
;
63+
bb:
64+
%icmp = icmp ult i64 %arg, 1025
65+
br i1 %icmp, label %bb4, label %bb2
66+
67+
bb2: ; preds = %bb
68+
%icmp3 = icmp ult i64 %arg, 262145
69+
br i1 %icmp3, label %bb4, label %bb9
70+
71+
bb4: ; preds = %bb2, %bb
72+
%phi = phi i64 [ 7, %bb ], [ 15487, %bb2 ]
73+
%phi5 = phi i64 [ 3, %bb ], [ 7, %bb2 ]
74+
%add = add nuw nsw i64 %phi, %arg
75+
%lshr = lshr i64 %add, %phi5
76+
%icmp6 = icmp samesign ult i64 %lshr, 4338
77+
br i1 %icmp6, label %bb8, label %bb7
78+
79+
bb7: ; preds = %bb4
80+
tail call void @llvm.ubsantrap(i8 18)
81+
unreachable
82+
83+
bb8: ; preds = %bb4
84+
%getelementptr = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 %lshr
85+
%load = load i32, ptr %getelementptr, align 4
86+
%sext = sext i32 %load to i64
87+
store i64 %sext, ptr %arg1, align 8
88+
br label %bb9
89+
90+
bb9: ; preds = %bb8, %bb2
91+
%phi10 = phi i1 [ true, %bb8 ], [ false, %bb2 ]
92+
ret i1 %phi10
93+
}
94+
95+
; Function Attrs: cold noreturn nounwind
96+
declare void @llvm.ubsantrap(i8 immarg) #0
97+
98+
attributes #0 = { cold noreturn nounwind }

0 commit comments

Comments
 (0)